summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-06 15:11:24 -0500
committerGravatar Sam Atman2026-02-06 15:11:24 -0500
commit717a6ab80c3c64176d2e1ed29da173ba51ee77b4 (patch)
treee6e043c8379bf5337dfd48f1f8bd85b4cd4c8100
parentzg module, casing improvements (diff)
downloadzg-717a6ab80c3c64176d2e1ed29da173ba51ee77b4.tar.gz
zg-717a6ab80c3c64176d2e1ed29da173ba51ee77b4.tar.xz
zg-717a6ab80c3c64176d2e1ed29da173ba51ee77b4.zip
Allocation-free README examples
-rw-r--r--NEWS.md3
-rw-r--r--README.md334
2 files changed, 157 insertions, 180 deletions
diff --git a/NEWS.md b/NEWS.md
index ada1405..9538017 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -45,6 +45,9 @@ But feel free!
45Pro tip: use LSP superpowers to rename the instance to the name of the 45Pro tip: use LSP superpowers to rename the instance to the name of the
46module, then just delete the initializer. Couldn't be simpler. 46module, then just delete the initializer. Couldn't be simpler.
47 47
48While further breaking changes are almost certain, this is the last
49refactor of this total magnitude which `zg` is likely to see.
50
48 51
49### zg: The Module 52### zg: The Module
50 53
diff --git a/README.md b/README.md
index d858eb4..fd46cab 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,10 @@ The Unicode version supported by zg is `16.0.0`.
12 12
13The minimum Zig version required is `0.15.2`. 13The minimum Zig version required is `0.15.2`.
14 14
15The official release of `zg 0.16` will require Zig `0.16.x`, whatever
16`x` is official this time. The last beta release will be kept around
17for those who don't want to bump Zig versions right away.
18
15 19
16## Integrating zg into your Zig Project 20## Integrating zg into your Zig Project
17 21
@@ -19,7 +23,7 @@ You first need to add zg as a dependency in your `build.zig.zon` file. In your
19Zig project's root directory, run: 23Zig project's root directory, run:
20 24
21```plain 25```plain
22zig fetch --save https://codeberg.org/atman/zg/archive/v0.15.3.tar.gz 26zig fetch --save https://codeberg.org/atman/zg/archive/v0.16.0-beta1.tar.gz
23``` 27```
24 28
25Then instantiate the dependency in your `build.zig`: 29Then instantiate the dependency in your `build.zig`:
@@ -28,36 +32,41 @@ Then instantiate the dependency in your `build.zig`:
28const zg = b.dependency("zg", .{}); 32const zg = b.dependency("zg", .{});
29``` 33```
30 34
35## Zig Module
31 36
32## A Modular Approach 37The `zg` package has classically been structured as a collection
38of mix-and-match modules. This approach is still available, just
39supplemented with a module-of- modules, also called `zg`.
33 40
34zg is a modular library. This approach minimizes binary file size and memory 41For historical reasons, many of the submodules use `TypeCase`, despite
35requirements by only including the Unicode data required for the specified module. 42the fact that they no longer require instantiation. Reflecting this,
36The following sections describe the various modules and their specific use case. 43the names of the modules in the `zg` scope are all `container_case`.
37 44
45To use in this fashion, import like so:
38 46
39### Init and Setup 47```zig
48exe.root_module.addImport("zg", zg.module("zg"));
49```
40 50
41The code examples will show the use of `Module.init(allocator)` to create the 51Rather than trying to split the difference, the README will reflect use
42various modules. All of the allocating modules have a `setup` variant, which 52of `zg` on a submodule basis. Note that any configurations discussed can
43takes a pointer and allocates in-place. 53be passed directly to the `zg` dependency import, and will reach that
54submodule accordingly.
44 55
45Example use:
46 56
47```zig 57### The Modular Approach
48test "Setup form" { 58
49 var graphemes = try allocator.create(Graphemes); 59`zg` is a modular library. This approach minimizes binary file size and
50 defer allocator.destroy(graphemes); 60memory requirements by only including the Unicode data required for the
51 try graphemes.setup(allocator); 61specified module. The following sections describe the various modules
52 defer graphemes.deinit(allocator); 62and their specific use case.
53}
54```
55 63
56 64
57## Code Points 65## Code Points
58 66
59In the `code_point` module, you'll find a data structure representing a single code 67In the `code_point` module, you'll find a data structure representing a
60point, `CodePoint`, and an `Iterator` to iterate over the code points in a string. 68single code point, `CodePoint`, and an `Iterator` to iterate over the
69code points in a string.
61 70
62In your `build.zig`: 71In your `build.zig`:
63 72
@@ -150,11 +159,8 @@ In your code:
150const Graphemes = @import("Graphemes"); 159const Graphemes = @import("Graphemes");
151 160
152test "Grapheme cluster iterator" { 161test "Grapheme cluster iterator" {
153 const graph = try Graphemes.init(allocator);
154 defer graph.deinit(allocator);
155
156 const str = "He\u{301}"; // Hé 162 const str = "He\u{301}"; // Hé
157 var iter = graph.iterator(str); 163 var iter = Graphemes.iterator(str);
158 164
159 var i: usize = 0; 165 var i: usize = 0;
160 166
@@ -176,8 +182,7 @@ test "Grapheme cluster iterator" {
176 try expectEqualStrings("e\u{301}", gc.bytes(str)); 182 try expectEqualStrings("e\u{301}", gc.bytes(str));
177 } 183 }
178 } 184 }
179} 185}```
180```
181 186
182 187
183## Words 188## Words
@@ -211,17 +216,15 @@ In your code:
211const Words = @import("Words"); 216const Words = @import("Words");
212 217
213test "Words" { 218test "Words" {
214 const wb = try Words.init(testing.allocator);
215 defer wb.deinit(testing.allocator);
216 const word_str = "Metonym Μετωνύμιο メトニム"; 219 const word_str = "Metonym Μετωνύμιο メトニム";
217 var w_iter = wb.iterator(word_str); 220 var w_iter = Words.iterator(word_str);
218 try testing.expectEqualStrings("Metonym", w_iter.next().?.bytes(word_str)); 221 try testing.expectEqualStrings("Metonym", w_iter.next().?.bytes(word_str));
219 // Spaces are "words" too! 222 // Spaces are "words" too!
220 try testing.expectEqualStrings(" ", w_iter.next().?.bytes(word_str)); 223 try testing.expectEqualStrings(" ", w_iter.next().?.bytes(word_str));
221 const in_greek = w_iter.next().?; 224 const in_greek = w_iter.next().?;
222 // wordAtIndex doesn't care if the index is valid for a codepoint: 225 // wordAtIndex doesn't care if the index is valid for a codepoint:
223 for (in_greek.offset..in_greek.offset + in_greek.len) |i| { 226 for (in_greek.offset..in_greek.offset + in_greek.len) |i| {
224 const at_index = wb.wordAtIndex(word_str, i).bytes(word_str); 227 const at_index = Words.wordAtIndex(word_str, i).bytes(word_str);
225 try testing.expectEqualStrings("Μετωνύμιο", at_index); 228 try testing.expectEqualStrings("Μετωνύμιο", at_index);
226 } 229 }
227 _ = w_iter.next(); 230 _ = w_iter.next();
@@ -231,7 +234,8 @@ test "Words" {
231 234
232## Unicode General Categories 235## Unicode General Categories
233 236
234To detect the general category for a code point, use the `GeneralCategories` module. 237To detect the general category for a code point, use the
238`GeneralCategories` module.
235 239
236In your `build.zig`: 240In your `build.zig`:
237 241
@@ -245,31 +249,29 @@ In your code:
245const GeneralCategories = @import("GeneralCategories"); 249const GeneralCategories = @import("GeneralCategories");
246 250
247test "General Categories" { 251test "General Categories" {
248 const gen_cat = try GeneralCategories.init(allocator);
249 defer gen_cat.deinit(allocator);
250
251 // The `gc` method returns the abbreviated General Category. 252 // The `gc` method returns the abbreviated General Category.
252 // These abbreviations and descriptive comments can be found 253 // These abbreviations and descriptive comments can be found
253 // in the source file `src/GenCatData.zig` as en enum. 254 // in the source file `src/GenCatData.zig` as en enum.
254 try expect(gen_cat.gc('A') == .Lu); // Lu: uppercase letter 255 try expect(GeneralCategories.gc('A') == .Lu); // Lu: uppercase letter
255 try expect(gen_cat.gc('3') == .Nd); // Nd: decimal number 256 try expect(GeneralCategories.gc('3') == .Nd); // Nd: decimal number
256 257
257 // The following are convenience methods for groups of General 258 // The following are convenience methods for groups of General
258 // Categories. For example, all letter categories start with `L`: 259 // Categories. For example, all letter categories start with `L`:
259 // Lu, Ll, Lt, Lo. 260 // Lu, Ll, Lt, Lo.
260 try expect(gen_cat.isControl(0)); 261 try expect(GeneralCategories.isControl(0));
261 try expect(gen_cat.isLetter('z')); 262 try expect(GeneralCategories.isLetter('z'));
262 try expect(gen_cat.isMark('\u{301}')); 263 try expect(GeneralCategories.isMark('\u{301}'));
263 try expect(gen_cat.isNumber('3')); 264 try expect(GeneralCategories.isNumber('3'));
264 try expect(gen_cat.isPunctuation('[')); 265 try expect(GeneralCategories.isPunctuation('['));
265 try expect(gen_cat.isSeparator(' ')); 266 try expect(GeneralCategories.isSeparator(' '));
266 try expect(gen_cat.isSymbol('©')); 267 try expect(GeneralCategories.isSymbol('©'));
267} 268}
268``` 269```
269 270
270## Unicode Properties 271## Unicode Properties
271 272
272You can detect common properties of a code point with the `Properties` module. 273You can detect common properties of a code point with the `Properties`
274module.
273 275
274In your `build.zig`: 276In your `build.zig`:
275 277
@@ -282,48 +284,46 @@ In your code:
282```zig 284```zig
283const Properties = @import("Properties"); 285const Properties = @import("Properties");
284 286
285test "Properties" { 287const Properties = @import("Properties");
286 const props = try Properties.init(allocator);
287 defer props.deinit(allocator);
288 288
289test "Properties" {
289 // Mathematical symbols and letters. 290 // Mathematical symbols and letters.
290 try expect(props.isMath('+')); 291 try expect(Properties.isMath('+'));
291 // Alphabetic only code points. 292 // Alphabetic only code points.
292 try expect(props.isAlphabetic('Z')); 293 try expect(Properties.isAlphabetic('Z'));
293 // Space, tab, and other separators. 294 // Space, tab, and other separators.
294 try expect(props.isWhitespace(' ')); 295 try expect(Properties.isWhitespace(' '));
295 // Hexadecimal digits and variations thereof. 296 // Hexadecimal digits and variations thereof.
296 try expect(props.isHexDigit('f')); 297 try expect(Properties.isHexDigit('f'));
297 try expect(!props.isHexDigit('z')); 298 try expect(!Properties.isHexDigit('z'));
298 299
299 // Accents, dieresis, and other combining marks. 300 // Accents, dieresis, and other combining marks.
300 try expect(props.isDiacritic('\u{301}')); 301 try expect(Properties.isDiacritic('\u{301}'));
301 302
302 // Unicode has a specification for valid identifiers like 303 // Unicode has a specification for valid identifiers like
303 // the ones used in programming and regular expressions. 304 // the ones used in programming and regular expressions.
304 try expect(props.isIdStart('Z')); // Identifier start character 305 try expect(Properties.isIdStart('Z')); // Identifier start character
305 try expect(!props.isIdStart('1')); 306 try expect(!Properties.isIdStart('1'));
306 try expect(props.isIdContinue('1')); 307 try expect(Properties.isIdContinue('1'));
307 308
308 // The `X` versions add some code points that can appear after 309 // The `X` versions add some code points that can appear after
309 // normalizing a string. 310 // normalizing a string.
310 try expect(props.isXidStart('\u{b33}')); // Extended identifier start character 311 try expect(Properties.isXidStart('\u{b33}')); // Extended identifier start character
311 try expect(props.isXidContinue('\u{e33}')); 312 try expect(Properties.isXidContinue('\u{e33}'));
312 try expect(!props.isXidStart('1')); 313 try expect(!Properties.isXidStart('1'));
313 314
314 // Note surprising Unicode numeric type properties! 315 // Note surprising Unicode numeric type properties!
315 try expect(props.isNumeric('\u{277f}')); 316 try expect(Properties.isNumeric('\u{277f}'));
316 try expect(!props.isNumeric('3')); // 3 is not numeric! 317 try expect(!Properties.isNumeric('3')); // 3 is not numeric!
317 try expect(props.isDigit('\u{2070}')); 318 try expect(Properties.isDigit('\u{2070}'));
318 try expect(!props.isDigit('3')); // 3 is not a digit! 319 try expect(!Properties.isDigit('3')); // 3 is not a digit!
319 try expect(props.isDecimal('3')); // 3 is a decimal digit 320 try expect(Properties.isDecimal('3')); // 3 is a decimal digit
320} 321}```
321```
322 322
323## Letter Case Detection and Conversion 323## Letter Case Detection and Conversion
324 324
325To detect and convert to and from different letter cases, use the `LetterCasing` 325To detect and convert to and from different letter cases, use the
326module. 326`LetterCasing` module.
327 327
328In your `build.zig`: 328In your `build.zig`:
329 329
@@ -337,37 +337,35 @@ In your code:
337const LetterCasing = @import("LetterCasing"); 337const LetterCasing = @import("LetterCasing");
338 338
339test "LetterCasing" { 339test "LetterCasing" {
340 const case = try LetterCasing.init(allocator);
341 defer case.deinit(allocator);
342
343 // Upper and lower case. 340 // Upper and lower case.
344 try expect(case.isUpper('A')); 341 try expect(LetterCasing.isUpper('A'));
345 try expect('A' == case.toUpper('a')); 342 try expect('A' == LetterCasing.toUpper('a'));
346 try expect(case.isLower('a')); 343 try expect(LetterCasing.isLower('a'));
347 try expect('a' == case.toLower('A')); 344 try expect('a' == LetterCasing.toLower('A'));
348 345
349 // Code points that have case. 346 // Code points that have case.
350 try expect(case.isCased('É')); 347 try expect(LetterCasing.isCased('É'));
351 try expect(!case.isCased('3')); 348 try expect(!LetterCasing.isCased('3'));
352 349
353 // Case detection and conversion for strings. 350 // Case detection and conversion for strings.
354 try expect(case.isUpperStr("HELLO 123!")); 351 try expect(LetterCasing.isUpperStr("HELLO 123!"));
355 const ucased = try case.toUpperStr(allocator, "hello 123"); 352 const ucased = try LetterCasing.toUpperStr(allocator, "hello 123");
356 defer allocator.free(ucased); 353 defer allocator.free(ucased);
357 try expectEqualStrings("HELLO 123", ucased); 354 try expectEqualStrings("HELLO 123", ucased);
358 355
359 try expect(case.isLowerStr("hello 123!")); 356 try expect(LetterCasing.isLowerStr("hello 123!"));
360 const lcased = try case.toLowerStr(allocator, "HELLO 123"); 357 const lcased = try LetterCasing.toLowerStr(allocator, "HELLO 123");
361 defer allocator.free(lcased); 358 defer allocator.free(lcased);
362 try expectEqualStrings("hello 123", lcased); 359 try expectEqualStrings("hello 123", lcased);
363} 360}
364``` 361```
365 362
363
366## Normalization 364## Normalization
367 365
368Unicode normalization is the process of converting a string into a uniform 366Unicode normalization is the process of converting a string into a
369representation that can guarantee a known structure by following a strict set 367uniform representation that can guarantee a known structure by following
370of rules. There are four normalization forms: 368a strict set of rules. There are four normalization forms:
371 369
372**Canonical Composition (NFC)** 370**Canonical Composition (NFC)**
373: The most compact representation obtained by first 371: The most compact representation obtained by first
@@ -400,52 +398,51 @@ In your code:
400const Normalize = @import("Normalize"); 398const Normalize = @import("Normalize");
401 399
402test "Normalize" { 400test "Normalize" {
403 const normalize = try Normalize.init(allocator);
404 defer normalize.deinit(allocator);
405 401
406 // NFC: Canonical composition 402 // NFC: Canonical composition
407 const nfc_result = try normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}"); 403 const nfc_result = try Normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}");
408 defer nfc_result.deinit(allocator); 404 defer nfc_result.deinit(allocator);
409 try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); 405 try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice);
410 406
411 // NFKC: Compatibility composition 407 // NFKC: Compatibility composition
412 const nfkc_result = try normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); 408 const nfkc_result = try Normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
413 defer nfkc_result.deinit(allocator); 409 defer nfkc_result.deinit(allocator);
414 try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); 410 try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice);
415 411
416 // NFD: Canonical decomposition 412 // NFD: Canonical decomposition
417 const nfd_result = try normalize.nfd(allocator, "Héllo World! \u{3d3}"); 413 const nfd_result = try Normalize.nfd(allocator, "Héllo World! \u{3d3}");
418 defer nfd_result.deinit(allocator); 414 defer nfd_result.deinit(allocator);
419 try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); 415 try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice);
420 416
421 // NFKD: Compatibility decomposition 417 // NFKD: Compatibility decomposition
422 const nfkd_result = try normalize.nfkd(allocator, "Héllo World! \u{3d3}"); 418 const nfkd_result = try Normalize.nfkd(allocator, "Héllo World! \u{3d3}");
423 defer nfkd_result.deinit(allocator); 419 defer nfkd_result.deinit(allocator);
424 try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); 420 try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice);
425 421
426 // Test for equality of two strings after normalizing to NFC. 422 // Test for equality of two strings after normalizing to NFC.
427 try expect(try normalize.eql(allocator, "foé", "foe\u{0301}")); 423 try expect(try Normalize.eql(allocator, "foé", "foe\u{0301}"));
428 try expect(try normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); 424 try expect(try Normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
429} 425}
430``` 426```
431The `Result` returned by normalization functions may or may not be copied from the
432inputs given. For example, an all-ASCII input does not need to be a copy, and will
433be a view of the original slice. Calling `result.deinit(allocator)` will only free
434an allocated `Result`, not one which is a view. Thus it is safe to do
435unconditionally.
436 427
437This does mean that the validity of a `Result` can depend on the original string 428The `Result` returned by normalization functions may or may not be
438staying in memory. To ensure that your `Result` is always a copy, you may call 429copied from the inputs given. For example, an all-ASCII input does not
439`try result.toOwned(allocator)`, which will only make a copy if one was not 430need to be a copy, and will be a view of the original slice. Calling
440already made. 431`result.deinit(allocator)` will only free an allocated `Result`, not one
432which is a view. Thus it is safe to do unconditionally.
433
434This does mean that the validity of a `Result` can depend on the
435original string staying in memory. To ensure that your `Result` is
436always a copy, you may call `try result.toOwned(allocator)`, which will
437only make a copy if one was not already made.
441 438
442 439
443## Caseless Matching via Case Folding 440## Caseless Matching via Case Folding
444 441
445Unicode provides a more efficient way of comparing strings while ignoring letter 442Unicode provides a more efficient way of comparing strings while
446case differences: case folding. When you case fold a string, it's converted into a 443ignoring letter case differences: case folding. When you case fold
447normalized case form suitable for efficient matching. Use the `CaseFold` module 444a string, it's converted into a normalized case form suitable for
448for this. 445efficient matching. Use the `CaseFold` module for this.
449 446
450In your `build.zig`: 447In your `build.zig`:
451 448
@@ -459,54 +456,32 @@ In your code:
459const CaseFolding = @import("CaseFolding"); 456const CaseFolding = @import("CaseFolding");
460 457
461test "Caseless matching" { 458test "Caseless matching" {
462 // We need Unicode case fold data.
463 const case_fold = try CaseFolding.init(allocator);
464 defer case_fold.deinit(allocator);
465
466 // `compatCaselessMatch` provides the deepest level of caseless 459 // `compatCaselessMatch` provides the deepest level of caseless
467 // matching because it decomposes fully to NFKD. 460 // matching because it decomposes fully to NFKD.
468 const a = "Héllo World! \u{3d3}"; 461 const a = "Héllo World! \u{3d3}";
469 const b = "He\u{301}llo World! \u{3a5}\u{301}"; 462 const b = "He\u{301}llo World! \u{3a5}\u{301}";
470 try expect(try case_fold.compatCaselessMatch(allocator, a, b)); 463 try expect(try CaseFolding.compatCaselessMatch(allocator, a, b));
471 464
472 const c = "He\u{301}llo World! \u{3d2}\u{301}"; 465 const c = "He\u{301}llo World! \u{3d2}\u{301}";
473 try expect(try case_fold.compatCaselessMatch(allocator, a, c)); 466 try expect(try CaseFolding.compatCaselessMatch(allocator, a, c));
474 467
475 // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch` 468 // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch`
476 // because it only decomposes to NFD. Naturally, it's faster because of this. 469 // because it only decomposes to NFD. Naturally, it's faster because of this.
477 try expect(!try case_fold.canonCaselessMatch(allocator, a, b)); 470 try expect(!try CaseFolding.canonCaselessMatch(allocator, a, b));
478 try expect(try case_fold.canonCaselessMatch(allocator, a, c)); 471 try expect(try CaseFolding.canonCaselessMatch(allocator, a, c));
479}
480```
481Case folding needs to use the `Normalize` module in order to produce the compatibility
482forms for comparison. If you are already using a `Normalize` for other purposes,
483`CaseFolding` can borrow it:
484
485```zig
486const CaseFolding = @import("CaseFolding");
487const Normalize = @import("Normalize");
488
489test "Initialize With a Normalize" {
490 const normalize = try Normalize.init(allocator);
491 // You're responsible for freeing this:
492 defer normalize.deinit(allocator);
493 const case_fold = try CaseFolding.initWithNormalize(allocator, normalize);
494 // This will not free your normalize when it runs first.
495 defer case_fold.deinit(allocator);
496} 472}
497``` 473```
498This has a `setupWithNormalize` variant as well, note that this also takes
499a `Normalize` struct, and not a pointer to it.
500 474
501 475
502## Display Width of Characters and Strings 476## Display Width of Characters and Strings
503 477
504When displaying text with a fixed-width font on a terminal screen, it's very 478When displaying text with a fixed-width font on a terminal screen, it's
505important to know exactly how many columns or cells each character should take. 479very important to know exactly how many columns or cells each character
506Most characters will use one column, but there are many, like emoji and East- 480should take. Most characters will use one column, but there are
507Asian ideographs that need more space. The `DisplayWidth` module provides 481many, like emoji and East- Asian ideographs that need more space. The
508methods for this purpose. It also has methods that use the display width calculation 482`DisplayWidth` module provides methods for this purpose. It also has
509to `center`, `padLeft`, `padRight`, and `wrap` text. 483methods that use the display width calculation to `center`, `padLeft`,
484`padRight`, and `wrap` text.
510 485
511In your `build.zig`: 486In your `build.zig`:
512 487
@@ -520,34 +495,31 @@ In your code:
520const DisplayWidth = @import("DisplayWidth"); 495const DisplayWidth = @import("DisplayWidth");
521 496
522test "Display width" { 497test "Display width" {
523 const dw = try DisplayWidth.init(allocator);
524 defer dw.deinit(allocator);
525
526 // String display width 498 // String display width
527 try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); 499 try expectEqual(@as(usize, 5), DisplayWidth.strWidth("Hello\r\n"));
528 try expectEqual(@as(usize, 8), dw.strWidth("Hello 😊")); 500 try expectEqual(@as(usize, 8), DisplayWidth.strWidth("Hello 😊"));
529 try expectEqual(@as(usize, 8), dw.strWidth("Héllo 😊")); 501 try expectEqual(@as(usize, 8), DisplayWidth.strWidth("Héllo 😊"));
530 try expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); 502 try expectEqual(@as(usize, 9), DisplayWidth.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
531 try expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); 503 try expectEqual(@as(usize, 17), DisplayWidth.strWidth("슬라바 우크라이나"));
532 504
533 // Centering text 505 // Centering text
534 const centered = try dw.center(allocator, "w😊w", 10, "-"); 506 const centered = try DisplayWidth.center(allocator, "w😊w", 10, "-");
535 defer allocator.free(centered); 507 defer allocator.free(centered);
536 try expectEqualStrings("---w😊w---", centered); 508 try expectEqualStrings("---w😊w---", centered);
537 509
538 // Pad left 510 // Pad left
539 const right_aligned = try dw.padLeft(allocator, "abc", 9, "*"); 511 const right_aligned = try DisplayWidth.padLeft(allocator, "abc", 9, "*");
540 defer allocator.free(right_aligned); 512 defer allocator.free(right_aligned);
541 try expectEqualStrings("******abc", right_aligned); 513 try expectEqualStrings("******abc", right_aligned);
542 514
543 // Pad right 515 // Pad right
544 const left_aligned = try dw.padRight(allocator, "abc", 9, "*"); 516 const left_aligned = try DisplayWidth.padRight(allocator, "abc", 9, "*");
545 defer allocator.free(left_aligned); 517 defer allocator.free(left_aligned);
546 try expectEqualStrings("abc******", left_aligned); 518 try expectEqualStrings("abc******", left_aligned);
547 519
548 // Wrap text 520 // Wrap text
549 const input = "The quick brown fox\r\njumped over the lazy dog!"; 521 const input = "The quick brown fox\r\njumped over the lazy dog!";
550 const wrapped = try dw.wrap(allocator, input, 10, 3); 522 const wrapped = try DisplayWidth.wrap(allocator, input, 10, 3);
551 defer allocator.free(wrapped); 523 defer allocator.free(wrapped);
552 const want = 524 const want =
553 \\The quick 525 \\The quick
@@ -560,7 +532,8 @@ test "Display width" {
560} 532}
561``` 533```
562 534
563This module has build options. The first is `cjk`, which will consider [ambiguous characters](https://www.unicode.org/reports/tr11/tr11-6.html) as double-width. 535This module has build options. The first is `cjk`, which will consider
536[ambiguous characters][ambig] as double-width.
564 537
565To choose this option, add it to the dependency like so: 538To choose this option, add it to the dependency like so:
566 539
@@ -570,22 +543,28 @@ const zg = b.dependency("zg", .{
570}); 543});
571``` 544```
572 545
573The other options are `c0_width` and `c1_width`. The standard behavior is to treat 546The other options are `c0_width` and `c1_width`. The standard behavior
574C0 and C1 control codes as zero-width, except for delete and backspace, which are 547is to treat C0 and C1 control codes as zero-width, except for delete and
575-1 (the logic ensures that a `strWidth` is always at least 0). If printing 548backspace, which are -1 (the logic ensures that a `strWidth` is always
576control codes with replacement characters, it's necessary to assign these a width, 549at least 0). If printing control codes with replacement characters,
577hence the options. When provided these values must fit in an `i4`, this allows 550it's necessary to assign these a width, hence the options. When
578for C1s to be printed as `\u{80}` if desired. 551provided these values must fit in an `i4`, this allows for C1s to be
552printed as `\u{80}` if desired.
553
554`DisplayWidth` uses the `Graphemes` module internally.
555If you already have one, it can be borrowed using
556`DisplayWidth.initWithGraphemes(allocator, graphemes)` in the same
557fashion as shown for `CaseFolding` and `Normalize`.
579 558
580`DisplayWidth` uses the `Graphemes` module internally. If you already have one, 559
581it can be borrowed using `DisplayWidth.initWithGraphemes(allocator, graphemes)` 560[ambig]: https://www.unicode.org/reports/tr11/tr11-6.html
582in the same fashion as shown for `CaseFolding` and `Normalize`.
583 561
584## Scripts 562## Scripts
585 563
586Unicode categorizes code points by the Script in which they belong. A Script 564Unicode categorizes code points by the Script in which they belong. A
587collects letters and other symbols that belong to a particular writing system. 565Script collects letters and other symbols that belong to a particular
588You can detect the Script for a code point with the `Scripts` module. 566writing system. You can detect the Script for a code point with the
567`Scripts` module.
589 568
590In your `build.zig`: 569In your `build.zig`:
591 570
@@ -596,23 +575,21 @@ exe.root_module.addImport("Scripts", zg.module("Scripts"));
596In your code: 575In your code:
597 576
598```zig 577```zig
599const Scripts= @import("Scripts"); 578const Scripts = @import("Scripts");
600 579
601test "Scripts" { 580test "Scripts" {
602 const scripts = try Scripts.init(allocator);
603 defer scripts.deinit(allocator);
604
605 // To see the full list of Scripts, look at the 581 // To see the full list of Scripts, look at the
606 // `src/Scripts.zig` file. They are list in an enum. 582 // `src/Scripts.zig` file. They are list in an enum.
607 try expect(scripts.script('A') == .Latin); 583 try expect(Scripts.script('A') == .Latin);
608 try expect(scripts.script('Ω') == .Greek); 584 try expect(Scripts.script('Ω') == .Greek);
609 try expect(scripts.script('צ') == .Hebrew); 585 try expect(Scripts.script('צ') == .Hebrew);
610} 586}
611``` 587```
612 588
613## Emoji 589## Emoji
614 590
615To get information about emoji and emoji-like characters, use the `Emoji` module. 591To get information about emoji and emoji-like characters, use the
592`Emoji` module.
616 593
617In your `build.zig`: 594In your `build.zig`:
618 595
@@ -626,15 +603,12 @@ In your code:
626const Emoji = @import("Emoji"); 603const Emoji = @import("Emoji");
627 604
628test "Emoji" { 605test "Emoji" {
629 const emoji = try Emoji.init(allocator); 606 try expect(Emoji.isEmoji(0x1F415)); // 🐕
630 defer emoji.deinit(allocator); 607 try expect(Emoji.isEmojiPresentation(0x1F408)); // 🐈
631 608 try expect(Emoji.isEmojiModifier(0x1F3FF)); //
632 try expect(emoji.isEmoji(0x1F415)); // 🐕 609 try expect(Emoji.isEmojiModifierBase(0x1F977)); // 🥷
633 try expect(emoji.isEmojiPresentation(0x1F408)); // 🐈 610 try expect(Emoji.isEmojiComponent(0x1F9B0)); // 🦰
634 try expect(emoji.isEmojiModifier(0x1F3FF)); // 🏿 611 try expect(Emoji.isExtendedPictographic(0x1F005)); // 🀅
635 try expect(emoji.isEmojiModifierBase(0x1F977)); // 🥷
636 try expect(emoji.isEmojiComponent(0x1F9B0)); // 🦰
637 try expect(emoji.isExtendedPictographic(0x1F005)); // 🀅
638} 612}
639``` 613```
640 614