diff options
| author | 2025-04-30 15:59:56 -0400 | |
|---|---|---|
| committer | 2025-04-30 15:59:56 -0400 | |
| commit | 3a6cfa885697b5fecf0473b602dc38a0af0d3f7d (patch) | |
| tree | 64eacca86ec8302bf966e4731e125243067d8ba9 | |
| parent | Rest of the Renamings (diff) | |
| download | zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.tar.gz zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.tar.xz zg-3a6cfa885697b5fecf0473b602dc38a0af0d3f7d.zip | |
Update README.md to new API
| -rw-r--r-- | README.md | 216 | ||||
| -rw-r--r-- | src/DisplayWidth.zig | 20 |
2 files changed, 121 insertions, 115 deletions
| @@ -82,21 +82,20 @@ them, `Grapheme`, and an `Iterator` to iterate over them in a string. | |||
| 82 | In your `build.zig`: | 82 | In your `build.zig`: |
| 83 | 83 | ||
| 84 | ```zig | 84 | ```zig |
| 85 | exe.root_module.addImport("grapheme", zg.module("grapheme")); | 85 | exe.root_module.addImport("Graphemes", zg.module("Graphemes")); |
| 86 | ``` | 86 | ``` |
| 87 | 87 | ||
| 88 | In your code: | 88 | In your code: |
| 89 | 89 | ||
| 90 | ```zig | 90 | ```zig |
| 91 | const grapheme = @import("grapheme"); | 91 | const Graphemes = @import("Graphemes"); |
| 92 | 92 | ||
| 93 | test "Grapheme cluster iterator" { | 93 | test "Grapheme cluster iterator" { |
| 94 | // we need some Unicode data to process Grapheme Clusters. | 94 | const graph = try Graphemes.init(allocator); |
| 95 | const gd = try grapheme.GraphemeData.init(allocator); | 95 | defer graph.deinit(allocator); |
| 96 | defer gd.deinit(allocator); | ||
| 97 | 96 | ||
| 98 | const str = "He\u{301}"; // Hé | 97 | const str = "He\u{301}"; // Hé |
| 99 | var iter = grapheme.Iterator.init(str, &gd); | 98 | var iter = graph.iterator(str); |
| 100 | 99 | ||
| 101 | var i: usize = 0; | 100 | var i: usize = 0; |
| 102 | 101 | ||
| @@ -123,133 +122,133 @@ test "Grapheme cluster iterator" { | |||
| 123 | 122 | ||
| 124 | ## Unicode General Categories | 123 | ## Unicode General Categories |
| 125 | 124 | ||
| 126 | To detect the general category for a code point, use the `GenCatData` module. | 125 | To detect the general category for a code point, use the `GeneralCategories` module. |
| 127 | 126 | ||
| 128 | In your `build.zig`: | 127 | In your `build.zig`: |
| 129 | 128 | ||
| 130 | ```zig | 129 | ```zig |
| 131 | exe.root_module.addImport("GenCatData", zg.module("GenCatData")); | 130 | exe.root_module.addImport("GeneralCategories", zg.module("GeneralCategories")); |
| 132 | ``` | 131 | ``` |
| 133 | 132 | ||
| 134 | In your code: | 133 | In your code: |
| 135 | 134 | ||
| 136 | ```zig | 135 | ```zig |
| 137 | const GenCatData = @import("GenCatData"); | 136 | const GeneralCategories = @import("GeneralCategories"); |
| 138 | 137 | ||
| 139 | test "General Category" { | 138 | test "General Category" { |
| 140 | const gcd = try GenCatData.init(allocator); | 139 | const gen_cat = try GeneralCategories.init(allocator); |
| 141 | defer gcd.deinit(allocator); | 140 | defer gen_cat.deinit(allocator); |
| 142 | 141 | ||
| 143 | // The `gc` method returns the abbreviated General Category. | 142 | // The `gc` method returns the abbreviated General Category. |
| 144 | // These abbreviations and descriptive comments can be found | 143 | // These abbreviations and descriptive comments can be found |
| 145 | // in the source file `src/GenCatData.zig` as en enum. | 144 | // in the source file `src/GenCatData.zig` as en enum. |
| 146 | try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter | 145 | try expect(gen_cat.gc('A') == .Lu); // Lu: uppercase letter |
| 147 | try expect(gcd.gc('3') == .Nd); // Nd: decimal number | 146 | try expect(gen_cat.gc('3') == .Nd); // Nd: decimal number |
| 148 | 147 | ||
| 149 | // The following are convenience methods for groups of General | 148 | // The following are convenience methods for groups of General |
| 150 | // Categories. For example, all letter categories start with `L`: | 149 | // Categories. For example, all letter categories start with `L`: |
| 151 | // Lu, Ll, Lt, Lo. | 150 | // Lu, Ll, Lt, Lo. |
| 152 | try expect(gcd.isControl(0)); | 151 | try expect(gen_cat.isControl(0)); |
| 153 | try expect(gcd.isLetter('z')); | 152 | try expect(gen_cat.isLetter('z')); |
| 154 | try expect(gcd.isMark('\u{301}')); | 153 | try expect(gen_cat.isMark('\u{301}')); |
| 155 | try expect(gcd.isNumber('3')); | 154 | try expect(gen_cat.isNumber('3')); |
| 156 | try expect(gcd.isPunctuation('[')); | 155 | try expect(gen_cat.isPunctuation('[')); |
| 157 | try expect(gcd.isSeparator(' ')); | 156 | try expect(gen_cat.isSeparator(' ')); |
| 158 | try expect(gcd.isSymbol('©')); | 157 | try expect(gen_cat.isSymbol('©')); |
| 159 | } | 158 | } |
| 160 | ``` | 159 | ``` |
| 161 | 160 | ||
| 162 | ## Unicode Properties | 161 | ## Unicode Properties |
| 163 | 162 | ||
| 164 | You can detect common properties of a code point with the `PropsData` module. | 163 | You can detect common properties of a code point with the `Properties` module. |
| 165 | 164 | ||
| 166 | In your `build.zig`: | 165 | In your `build.zig`: |
| 167 | 166 | ||
| 168 | ```zig | 167 | ```zig |
| 169 | exe.root_module.addImport("PropsData", zg.module("PropsData")); | 168 | exe.root_module.addImport("Properties", zg.module("Properties")); |
| 170 | ``` | 169 | ``` |
| 171 | 170 | ||
| 172 | In your code: | 171 | In your code: |
| 173 | 172 | ||
| 174 | ```zig | 173 | ```zig |
| 175 | const PropsData = @import("PropsData"); | 174 | const Properties = @import("Properties"); |
| 176 | 175 | ||
| 177 | test "Properties" { | 176 | test "Properties" { |
| 178 | const pd = try PropsData.init(allocator); | 177 | const props = try Properties.init(allocator); |
| 179 | defer pd.deinit(allocator); | 178 | defer props.deinit(allocator); |
| 180 | 179 | ||
| 181 | // Mathematical symbols and letters. | 180 | // Mathematical symbols and letters. |
| 182 | try expect(pd.isMath('+')); | 181 | try expect(props.isMath('+')); |
| 183 | // Alphabetic only code points. | 182 | // Alphabetic only code points. |
| 184 | try expect(pd.isAlphabetic('Z')); | 183 | try expect(props.isAlphabetic('Z')); |
| 185 | // Space, tab, and other separators. | 184 | // Space, tab, and other separators. |
| 186 | try expect(pd.isWhitespace(' ')); | 185 | try expect(props.isWhitespace(' ')); |
| 187 | // Hexadecimal digits and variations thereof. | 186 | // Hexadecimal digits and variations thereof. |
| 188 | try expect(pd.isHexDigit('f')); | 187 | try expect(props.isHexDigit('f')); |
| 189 | try expect(!pd.isHexDigit('z')); | 188 | try expect(!props.isHexDigit('z')); |
| 190 | 189 | ||
| 191 | // Accents, dieresis, and other combining marks. | 190 | // Accents, dieresis, and other combining marks. |
| 192 | try expect(pd.isDiacritic('\u{301}')); | 191 | try expect(props.isDiacritic('\u{301}')); |
| 193 | 192 | ||
| 194 | // Unicode has a specification for valid identifiers like | 193 | // Unicode has a specification for valid identifiers like |
| 195 | // the ones used in programming and regular expressions. | 194 | // the ones used in programming and regular expressions. |
| 196 | try expect(pd.isIdStart('Z')); // Identifier start character | 195 | try expect(props.isIdStart('Z')); // Identifier start character |
| 197 | try expect(!pd.isIdStart('1')); | 196 | try expect(!props.isIdStart('1')); |
| 198 | try expect(pd.isIdContinue('1')); | 197 | try expect(props.isIdContinue('1')); |
| 199 | 198 | ||
| 200 | // The `X` versions add some code points that can appear after | 199 | // The `X` versions add some code points that can appear after |
| 201 | // normalizing a string. | 200 | // normalizing a string. |
| 202 | try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character | 201 | try expect(props.isXidStart('\u{b33}')); // Extended identifier start character |
| 203 | try expect(pd.isXidContinue('\u{e33}')); | 202 | try expect(props.isXidContinue('\u{e33}')); |
| 204 | try expect(!pd.isXidStart('1')); | 203 | try expect(!props.isXidStart('1')); |
| 205 | 204 | ||
| 206 | // Note surprising Unicode numeric type properties! | 205 | // Note surprising Unicode numeric type properties! |
| 207 | try expect(pd.isNumeric('\u{277f}')); | 206 | try expect(props.isNumeric('\u{277f}')); |
| 208 | try expect(!pd.isNumeric('3')); // 3 is not numeric! | 207 | try expect(!props.isNumeric('3')); // 3 is not numeric! |
| 209 | try expect(pd.isDigit('\u{2070}')); | 208 | try expect(props.isDigit('\u{2070}')); |
| 210 | try expect(!pd.isDigit('3')); // 3 is not a digit! | 209 | try expect(!props.isDigit('3')); // 3 is not a digit! |
| 211 | try expect(pd.isDecimal('3')); // 3 is a decimal digit | 210 | try expect(props.isDecimal('3')); // 3 is a decimal digit |
| 212 | } | 211 | } |
| 213 | ``` | 212 | ``` |
| 214 | 213 | ||
| 215 | ## Letter Case Detection and Conversion | 214 | ## Letter Case Detection and Conversion |
| 216 | 215 | ||
| 217 | To detect and convert to and from different letter cases, use the `CaseData` | 216 | To detect and convert to and from different letter cases, use the `LetterCasing` |
| 218 | module. | 217 | module. |
| 219 | 218 | ||
| 220 | In your `build.zig`: | 219 | In your `build.zig`: |
| 221 | 220 | ||
| 222 | ```zig | 221 | ```zig |
| 223 | exe.root_module.addImport("CaseData", zg.module("CaseData")); | 222 | exe.root_module.addImport("LetterCasing", zg.module("LetterCasing")); |
| 224 | ``` | 223 | ``` |
| 225 | 224 | ||
| 226 | In your code: | 225 | In your code: |
| 227 | 226 | ||
| 228 | ```zig | 227 | ```zig |
| 229 | const CaseData = @import("CaseData"); | 228 | const LetterCasing = @import("LetterCasing"); |
| 230 | 229 | ||
| 231 | test "Case" { | 230 | test "Case" { |
| 232 | const cd = try CaseData.init(allocator); | 231 | const case = try LetterCasing.init(allocator); |
| 233 | defer cd.deinit(allocator); | 232 | defer case.deinit(allocator); |
| 234 | 233 | ||
| 235 | // Upper and lower case. | 234 | // Upper and lower case. |
| 236 | try expect(cd.isUpper('A')); | 235 | try expect(case.isUpper('A')); |
| 237 | try expect('A' == cd.toUpper('a')); | 236 | try expect('A' == case.toUpper('a')); |
| 238 | try expect(cd.isLower('a')); | 237 | try expect(case.isLower('a')); |
| 239 | try expect('a' == cd.toLower('A')); | 238 | try expect('a' == case.toLower('A')); |
| 240 | 239 | ||
| 241 | // Code points that have case. | 240 | // Code points that have case. |
| 242 | try expect(cd.isCased('É')); | 241 | try expect(case.isCased('É')); |
| 243 | try expect(!cd.isCased('3')); | 242 | try expect(!case.isCased('3')); |
| 244 | 243 | ||
| 245 | // Case detection and conversion for strings. | 244 | // Case detection and conversion for strings. |
| 246 | try expect(cd.isUpperStr("HELLO 123!")); | 245 | try expect(case.isUpperStr("HELLO 123!")); |
| 247 | const ucased = try cd.toUpperStr(allocator, "hello 123"); | 246 | const ucased = try case.toUpperStr(allocator, "hello 123"); |
| 248 | defer allocator.free(ucased); | 247 | defer allocator.free(ucased); |
| 249 | try expectEqualStrings("HELLO 123", ucased); | 248 | try expectEqualStrings("HELLO 123", ucased); |
| 250 | 249 | ||
| 251 | try expect(cd.isLowerStr("hello 123!")); | 250 | try expect(case.isLowerStr("hello 123!")); |
| 252 | const lcased = try cd.toLowerStr(allocator, "HELLO 123"); | 251 | const lcased = try case.toLowerStr(allocator, "HELLO 123"); |
| 253 | defer allocator.free(lcased); | 252 | defer allocator.free(lcased); |
| 254 | try expectEqualStrings("hello 123", lcased); | 253 | try expectEqualStrings("hello 123", lcased); |
| 255 | } | 254 | } |
| @@ -292,37 +291,32 @@ In your code: | |||
| 292 | const Normalize = @import("Normalize"); | 291 | const Normalize = @import("Normalize"); |
| 293 | 292 | ||
| 294 | test "Normalization" { | 293 | test "Normalization" { |
| 295 | // We need lots of Unicode dta for normalization. | 294 | const normalize = try Normalize.init(allocator); |
| 296 | var norm_data: Normalize.NormData = undefined; | 295 | defer normalize.deinit(allocator); |
| 297 | try Normalize.NormData.init(&norm_data, allocator); | ||
| 298 | defer norm_data.deinit(allocator); | ||
| 299 | |||
| 300 | // The `Normalize` structure takes a pointer to the data. | ||
| 301 | const n = Normalize{ .norm_data = &norm_data }; | ||
| 302 | 296 | ||
| 303 | // NFC: Canonical composition | 297 | // NFC: Canonical composition |
| 304 | const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 298 | const nfc_result = try normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 305 | defer nfc_result.deinit(allocator); | 299 | defer nfc_result.deinit(allocator); |
| 306 | try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); | 300 | try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); |
| 307 | 301 | ||
| 308 | // NFKC: Compatibility composition | 302 | // NFKC: Compatibility composition |
| 309 | const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 303 | const nfkc_result = try normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 310 | defer nfkc_result.deinit(allocator); | 304 | defer nfkc_result.deinit(allocator); |
| 311 | try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); | 305 | try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); |
| 312 | 306 | ||
| 313 | // NFD: Canonical decomposition | 307 | // NFD: Canonical decomposition |
| 314 | const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 308 | const nfd_result = try normalize.nfd(allocator, "Héllo World! \u{3d3}"); |
| 315 | defer nfd_result.deinit(allocator); | 309 | defer nfd_result.deinit(allocator); |
| 316 | try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); | 310 | try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); |
| 317 | 311 | ||
| 318 | // NFKD: Compatibility decomposition | 312 | // NFKD: Compatibility decomposition |
| 319 | const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 313 | const nfkd_result = try normalize.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 320 | defer nfkd_result.deinit(allocator); | 314 | defer nfkd_result.deinit(allocator); |
| 321 | try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); | 315 | try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); |
| 322 | 316 | ||
| 323 | // Test for equality of two strings after normalizing to NFC. | 317 | // Test for equality of two strings after normalizing to NFC. |
| 324 | try expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 318 | try expect(try normalize.eql(allocator, "foé", "foe\u{0301}")); |
| 325 | try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 319 | try expect(try normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| 326 | } | 320 | } |
| 327 | ``` | 321 | ``` |
| 328 | The `Result` returned by normalization functions may or may not be copied from the | 322 | The `Result` returned by normalization functions may or may not be copied from the |
| @@ -347,46 +341,53 @@ for this. | |||
| 347 | In your `build.zig`: | 341 | In your `build.zig`: |
| 348 | 342 | ||
| 349 | ```zig | 343 | ```zig |
| 350 | exe.root_module.addImport("Normalize", zg.module("Normalize")); | 344 | exe.root_module.addImport("CaseFolding", zg.module("CaseFolding")); |
| 351 | exe.root_module.addImport("CaseFold", zg.module("CaseFold")); | ||
| 352 | ``` | 345 | ``` |
| 353 | 346 | ||
| 354 | In your code: | 347 | In your code: |
| 355 | 348 | ||
| 356 | ```zig | 349 | ```zig |
| 357 | const Normalize = @import("Normalize"); | 350 | const CaseFolding = @import("CaseFolding"); |
| 358 | const CaseFold = @import("CaseFold"); | ||
| 359 | 351 | ||
| 360 | test "Caseless matching" { | 352 | test "Caseless matching" { |
| 361 | // We need to normalize during the matching process. | ||
| 362 | var norm_data: Normalize.NormData = undefined; | ||
| 363 | try Normalize.NormData.init(&norm_data, allocator); | ||
| 364 | defer norm_data.deinit(allocator); | ||
| 365 | const n = Normalize{ .norm_data = &norm_data }; | ||
| 366 | |||
| 367 | // We need Unicode case fold data. | 353 | // We need Unicode case fold data. |
| 368 | const cfd = try CaseFold.FoldData.init(allocator); | 354 | const case_fold = try CaseFolding.init(allocator); |
| 369 | defer cfd.deinit(allocator); | 355 | defer case_fold.deinit(allocator); |
| 370 | |||
| 371 | // The `CaseFold` structure takes a pointer to the data. | ||
| 372 | const cf = CaseFold{ .fold_data = &cfd }; | ||
| 373 | 356 | ||
| 374 | // `compatCaselessMatch` provides the deepest level of caseless | 357 | // `compatCaselessMatch` provides the deepest level of caseless |
| 375 | // matching because it decomposes fully to NFKD. | 358 | // matching because it decomposes fully to NFKD. |
| 376 | const a = "Héllo World! \u{3d3}"; | 359 | const a = "Héllo World! \u{3d3}"; |
| 377 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 360 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 378 | try expect(try cf.compatCaselessMatch(allocator, &n, a, b)); | 361 | try expect(try case_fold.compatCaselessMatch(allocator, &n, a, b)); |
| 379 | 362 | ||
| 380 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 363 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 381 | try expect(try cf.compatCaselessMatch(allocator, &n, a, c)); | 364 | try expect(try case_fold.compatCaselessMatch(allocator, &n, a, c)); |
| 382 | 365 | ||
| 383 | // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch` | 366 | // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch` |
| 384 | // because it only decomposes to NFD. Naturally, it's faster because of this. | 367 | // because it only decomposes to NFD. Naturally, it's faster because of this. |
| 385 | try expect(!try cf.canonCaselessMatch(allocator, &n, a, b)); | 368 | try expect(!try case_fold.canonCaselessMatch(allocator, &n, a, b)); |
| 386 | try expect(try cf.canonCaselessMatch(allocator, &n, a, c)); | 369 | try expect(try case_fold.canonCaselessMatch(allocator, &n, a, c)); |
| 370 | } | ||
| 371 | ``` | ||
| 372 | Case folding needs to use the `Normalize` module in order to produce the compatibility | ||
| 373 | forms for comparison. If you are already using a `Normalize` for other purposes, | ||
| 374 | `CaseFolding` can borrow it: | ||
| 375 | |||
| 376 | ```zig | ||
| 377 | const CaseFolding = @import("CaseFolding"); | ||
| 378 | const Normalize = @import("Normalize"); | ||
| 379 | |||
| 380 | test "Initialize With a Normalize" { | ||
| 381 | const normalize = try Normalize.init(allocator); | ||
| 382 | // You're responsible for freeing this: | ||
| 383 | defer normalize.deinit(allocator); | ||
| 384 | const case_fold = try CaseFolding.initWithNormalize(allocator, normalize); | ||
| 385 | // This will not free your normalize when it runs first. | ||
| 386 | defer case_fold.deinit(allocator); | ||
| 387 | } | 387 | } |
| 388 | ``` | 388 | ``` |
| 389 | 389 | ||
| 390 | |||
| 390 | ## Display Width of Characters and Strings | 391 | ## Display Width of Characters and Strings |
| 391 | 392 | ||
| 392 | When displaying text with a fixed-width font on a terminal screen, it's very | 393 | When displaying text with a fixed-width font on a terminal screen, it's very |
| @@ -408,12 +409,8 @@ In your code: | |||
| 408 | const DisplayWidth = @import("DisplayWidth"); | 409 | const DisplayWidth = @import("DisplayWidth"); |
| 409 | 410 | ||
| 410 | test "Display width" { | 411 | test "Display width" { |
| 411 | // We need Unicode data for display width calculation. | 412 | const dw = try DisplayWidth.init(allocator); |
| 412 | const dwd = try DisplayWidth.DisplayWidthData.init(allocator); | 413 | defer dw.deinit(allocator); |
| 413 | defer dwd.deinit(allocator); | ||
| 414 | |||
| 415 | // The `DisplayWidth` structure takes a pointer to the data. | ||
| 416 | const dw = DisplayWidth{ .data = &dwd }; | ||
| 417 | 414 | ||
| 418 | // String display width | 415 | // String display width |
| 419 | try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); | 416 | try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); |
| @@ -462,34 +459,43 @@ const zg = b.dependency("zg", .{ | |||
| 462 | }); | 459 | }); |
| 463 | ``` | 460 | ``` |
| 464 | 461 | ||
| 465 | The other options are `c0_width` and `c1_width`. The standard behavior is to treat C0 and C1 control codes as zero-width, except for delete and backspace, which are -1 (the logic ensures that a `strWidth` is always at least 0). If printing control codes with replacement characters, it's necessary to assign these a width, hence the options. When provided these values must fit in an `i4`, this allows for C1s to be printed as `\u{80}` if desired. | 462 | The other options are `c0_width` and `c1_width`. The standard behavior is to treat |
| 463 | C0 and C1 control codes as zero-width, except for delete and backspace, which are | ||
| 464 | -1 (the logic ensures that a `strWidth` is always at least 0). If printing | ||
| 465 | control codes with replacement characters, it's necessary to assign these a width, | ||
| 466 | hence the options. When provided these values must fit in an `i4`, this allows | ||
| 467 | for C1s to be printed as `\u{80}` if desired. | ||
| 468 | |||
| 469 | `DisplayWidth` uses the `Graphemes` module internally. If you already have one, | ||
| 470 | it can be borrowed using `DisplayWidth.initWithGraphemes(allocator, graphemes)` | ||
| 471 | in the same fashion as shown for `CaseFolding` and `Normalize`. | ||
| 466 | 472 | ||
| 467 | ## Scripts | 473 | ## Scripts |
| 468 | 474 | ||
| 469 | Unicode categorizes code points by the Script in which they belong. A Script | 475 | Unicode categorizes code points by the Script in which they belong. A Script |
| 470 | collects letters and other symbols that belong to a particular writing system. | 476 | collects letters and other symbols that belong to a particular writing system. |
| 471 | You can detect the Script for a code point with the `ScriptsData` module. | 477 | You can detect the Script for a code point with the `Scripts` module. |
| 472 | 478 | ||
| 473 | In your `build.zig`: | 479 | In your `build.zig`: |
| 474 | 480 | ||
| 475 | ```zig | 481 | ```zig |
| 476 | exe.root_module.addImport("ScriptsData", zg.module("ScriptsData")); | 482 | exe.root_module.addImport("Scripts", zg.module("Scripts")); |
| 477 | ``` | 483 | ``` |
| 478 | 484 | ||
| 479 | In your code: | 485 | In your code: |
| 480 | 486 | ||
| 481 | ```zig | 487 | ```zig |
| 482 | const ScriptsData = @import("ScriptsData"); | 488 | const Scripts= @import("Scripts"); |
| 483 | 489 | ||
| 484 | test "Scripts" { | 490 | test "Scripts" { |
| 485 | const sd = try ScriptsData.init(allocator); | 491 | const scripts = try Scripts.init(allocator); |
| 486 | defer sd.deinit(allocator); | 492 | defer scripts.deinit(allocator); |
| 487 | 493 | ||
| 488 | // To see the full list of Scripts, look at the | 494 | // To see the full list of Scripts, look at the |
| 489 | // `src/ScriptsData.zig` file. They are list in an enum. | 495 | // `src/Scripts.zig` file. They are list in an enum. |
| 490 | try expect(sd.script('A') == .Latin); | 496 | try expect(scripts.script('A') == .Latin); |
| 491 | try expect(sd.script('Ω') == .Greek); | 497 | try expect(scripts.script('Ω') == .Greek); |
| 492 | try expect(sd.script('צ') == .Hebrew); | 498 | try expect(scripts.script('צ') == .Hebrew); |
| 493 | } | 499 | } |
| 494 | ``` | 500 | ``` |
| 495 | 501 | ||
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 11ec59e..c0d6d96 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -13,10 +13,10 @@ pub const DisplayWidthData = @import("DisplayWidthData"); | |||
| 13 | 13 | ||
| 14 | const Graphemes = @import("Graphemes"); | 14 | const Graphemes = @import("Graphemes"); |
| 15 | 15 | ||
| 16 | g_data: Graphemes, | 16 | graphemes: Graphemes, |
| 17 | s1: []u16 = undefined, | 17 | s1: []u16 = undefined, |
| 18 | s2: []i4 = undefined, | 18 | s2: []i4 = undefined, |
| 19 | owns_gdata: bool, | 19 | owns_graphemes: bool, |
| 20 | 20 | ||
| 21 | const DisplayWidth = @This(); | 21 | const DisplayWidth = @This(); |
| 22 | 22 | ||
| @@ -26,16 +26,16 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | |||
| 26 | allocator.free(dw.s1); | 26 | allocator.free(dw.s1); |
| 27 | allocator.free(dw.s2); | 27 | allocator.free(dw.s2); |
| 28 | } | 28 | } |
| 29 | dw.owns_gdata = true; | 29 | dw.owns_graphemes = true; |
| 30 | dw.g_data = try Graphemes.init(allocator); | 30 | dw.graphemes = try Graphemes.init(allocator); |
| 31 | errdefer dw.g_data.deinit(allocator); | 31 | errdefer dw.graphemes.deinit(allocator); |
| 32 | return dw; | 32 | return dw; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | pub fn initWithGraphemeData(allocator: mem.Allocator, g_data: Graphemes) mem.Allocator.Error!DisplayWidth { | 35 | pub fn initWithGraphemes(allocator: mem.Allocator, graphemes: Graphemes) mem.Allocator.Error!DisplayWidth { |
| 36 | var dw = try DisplayWidth.setup(allocator); | 36 | var dw = try DisplayWidth.setup(allocator); |
| 37 | dw.g_data = g_data; | 37 | dw.graphemes = graphemes; |
| 38 | dw.owns_gdata = false; | 38 | dw.owns_graphemes = false; |
| 39 | return dw; | 39 | return dw; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| @@ -67,7 +67,7 @@ fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | |||
| 67 | pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void { | 67 | pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void { |
| 68 | allocator.free(dw.s1); | 68 | allocator.free(dw.s1); |
| 69 | allocator.free(dw.s2); | 69 | allocator.free(dw.s2); |
| 70 | if (dw.owns_gdata) dw.g_data.deinit(allocator); | 70 | if (dw.owns_graphemes) dw.graphemes.deinit(allocator); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | /// codePointWidth returns the number of cells `cp` requires when rendered | 73 | /// codePointWidth returns the number of cells `cp` requires when rendered |
| @@ -119,7 +119,7 @@ pub fn strWidth(dw: DisplayWidth, str: []const u8) usize { | |||
| 119 | return @intCast(@max(0, total)); | 119 | return @intCast(@max(0, total)); |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | var giter = dw.g_data.iterator(str); | 122 | var giter = dw.graphemes.iterator(str); |
| 123 | 123 | ||
| 124 | while (giter.next()) |gc| { | 124 | while (giter.next()) |gc| { |
| 125 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; | 125 | var cp_iter = CodePointIterator{ .bytes = gc.bytes(str) }; |