diff options
| author | 2025-05-16 16:58:40 +0000 | |
|---|---|---|
| committer | 2025-05-16 16:58:40 +0000 | |
| commit | 0b05141b033043c5f7bcd72048a48eef6531ea6c (patch) | |
| tree | af6336ad8f5f7000cbaf92180469b65a2e940e32 | |
| parent | Update README.md (diff) | |
| parent | Fix benchmarks (diff) | |
| download | zg-0b05141b033043c5f7bcd72048a48eef6531ea6c.tar.gz zg-0b05141b033043c5f7bcd72048a48eef6531ea6c.tar.xz zg-0b05141b033043c5f7bcd72048a48eef6531ea6c.zip | |
Merge pull request 'Fix benchmarks' (#56) from jacobsandlund/zg:benchmarks into v0.14-betav0.14v0.14-beta
Reviewed-on: https://codeberg.org/atman/zg/pulls/56
Reviewed-by: atman <atman@noreply.codeberg.org>
| -rw-r--r-- | README.md | 84 | ||||
| -rw-r--r-- | bench/build.zig | 18 | ||||
| -rw-r--r-- | bench/build.zig.zon | 9 | ||||
| -rwxr-xr-x | bench/run_benches.sh | 5 | ||||
| -rw-r--r-- | bench/src/tests.zig | 177 | ||||
| -rw-r--r-- | bench/src/zg_case.zig | 12 | ||||
| -rw-r--r-- | bench/src/zg_caseless.zig | 15 | ||||
| -rw-r--r-- | bench/src/zg_codepoint.zig | 4 | ||||
| -rw-r--r-- | bench/src/zg_grapheme.zig | 9 | ||||
| -rw-r--r-- | bench/src/zg_normalize.zig | 4 | ||||
| -rw-r--r-- | bench/src/zg_width.zig | 3 |
11 files changed, 163 insertions, 177 deletions
| @@ -154,7 +154,7 @@ In your code: | |||
| 154 | ```zig | 154 | ```zig |
| 155 | const GeneralCategories = @import("GeneralCategories"); | 155 | const GeneralCategories = @import("GeneralCategories"); |
| 156 | 156 | ||
| 157 | test "General Category" { | 157 | test "General Categories" { |
| 158 | const gen_cat = try GeneralCategories.init(allocator); | 158 | const gen_cat = try GeneralCategories.init(allocator); |
| 159 | defer gen_cat.deinit(allocator); | 159 | defer gen_cat.deinit(allocator); |
| 160 | 160 | ||
| @@ -246,7 +246,7 @@ In your code: | |||
| 246 | ```zig | 246 | ```zig |
| 247 | const LetterCasing = @import("LetterCasing"); | 247 | const LetterCasing = @import("LetterCasing"); |
| 248 | 248 | ||
| 249 | test "Case" { | 249 | test "LetterCasing" { |
| 250 | const case = try LetterCasing.init(allocator); | 250 | const case = try LetterCasing.init(allocator); |
| 251 | defer case.deinit(allocator); | 251 | defer case.deinit(allocator); |
| 252 | 252 | ||
| @@ -309,7 +309,7 @@ In your code: | |||
| 309 | ```zig | 309 | ```zig |
| 310 | const Normalize = @import("Normalize"); | 310 | const Normalize = @import("Normalize"); |
| 311 | 311 | ||
| 312 | test "Normalization" { | 312 | test "Normalize" { |
| 313 | const normalize = try Normalize.init(allocator); | 313 | const normalize = try Normalize.init(allocator); |
| 314 | defer normalize.deinit(allocator); | 314 | defer normalize.deinit(allocator); |
| 315 | 315 | ||
| @@ -377,15 +377,15 @@ test "Caseless matching" { | |||
| 377 | // matching because it decomposes fully to NFKD. | 377 | // matching because it decomposes fully to NFKD. |
| 378 | const a = "Héllo World! \u{3d3}"; | 378 | const a = "Héllo World! \u{3d3}"; |
| 379 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 379 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 380 | try expect(try case_fold.compatCaselessMatch(allocator, &n, a, b)); | 380 | try expect(try case_fold.compatCaselessMatch(allocator, a, b)); |
| 381 | 381 | ||
| 382 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 382 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 383 | try expect(try case_fold.compatCaselessMatch(allocator, &n, a, c)); | 383 | try expect(try case_fold.compatCaselessMatch(allocator, a, c)); |
| 384 | 384 | ||
| 385 | // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch` | 385 | // `canonCaselessMatch` isn't as comprehensive as `compatCaselessMatch` |
| 386 | // because it only decomposes to NFD. Naturally, it's faster because of this. | 386 | // because it only decomposes to NFD. Naturally, it's faster because of this. |
| 387 | try expect(!try case_fold.canonCaselessMatch(allocator, &n, a, b)); | 387 | try expect(!try case_fold.canonCaselessMatch(allocator, a, b)); |
| 388 | try expect(try case_fold.canonCaselessMatch(allocator, &n, a, c)); | 388 | try expect(try case_fold.canonCaselessMatch(allocator, a, c)); |
| 389 | } | 389 | } |
| 390 | ``` | 390 | ``` |
| 391 | Case folding needs to use the `Normalize` module in order to produce the compatibility | 391 | Case folding needs to use the `Normalize` module in order to produce the compatibility |
| @@ -536,61 +536,61 @@ Benchmarks demonstrate the above stated goals have been met: | |||
| 536 | ```plain | 536 | ```plain |
| 537 | Binary sizes ======= | 537 | Binary sizes ======= |
| 538 | 538 | ||
| 539 | 149K ziglyph_case | 539 | 172K ziglyph_case |
| 540 | 87K zg_case | 540 | 109K zg_case |
| 541 | 541 | ||
| 542 | 275K ziglyph_caseless | 542 | 299K ziglyph_caseless |
| 543 | 168K zg_caseless | 543 | 175K zg_caseless |
| 544 | 544 | ||
| 545 | 68K ziglyph_codepoint | 545 | 91K ziglyph_codepoint |
| 546 | 68K zg_codepoint | 546 | 91K zg_codepoint |
| 547 | 547 | ||
| 548 | 101K ziglyph_grapheme | 548 | 108K ziglyph_grapheme |
| 549 | 86K zg_grapheme | 549 | 109K zg_grapheme |
| 550 | 550 | ||
| 551 | 185K ziglyph_normalizer | 551 | 208K ziglyph_normalizer |
| 552 | 152K zg_normalize | 552 | 175K zg_normalize |
| 553 | 553 | ||
| 554 | 101K ziglyph_width | 554 | 124K ziglyph_width |
| 555 | 86K zg_width | 555 | 109K zg_width |
| 556 | 556 | ||
| 557 | Benchmarks ========== | 557 | Benchmarks ========== |
| 558 | 558 | ||
| 559 | Ziglyph toUpperStr/toLowerStr: result: 7911596, took: 80 | 559 | Ziglyph toUpperStr/toLowerStr: result: 7756580, took: 74 |
| 560 | Ziglyph isUpperStr/isLowerStr: result: 110959, took: 17 | 560 | Ziglyph isUpperStr/isLowerStr: result: 110959, took: 17 |
| 561 | zg toUpperStr/toLowerStr: result: 7911596, took: 62 | 561 | zg toUpperStr/toLowerStr: result: 7756580, took: 58 |
| 562 | zg isUpperStr/isLowerStr: result: 110959, took: 7 | 562 | zg isUpperStr/isLowerStr: result: 110959, took: 11 |
| 563 | 563 | ||
| 564 | Ziglyph Normalizer.eqlCaseless: result: 625, took: 500 | 564 | Ziglyph Normalizer.eqlCaseless: result: 626, took: 479 |
| 565 | zg CaseFold.canonCaselessMatch: result: 625, took: 385 | 565 | zg CaseFolding.canonCaselessMatch: result: 626, took: 296 |
| 566 | zg CaseFold.compatCaselessMatch: result: 625, took: 593 | 566 | zg CaseFolding.compatCaselessMatch: result: 626, took: 604 |
| 567 | 567 | ||
| 568 | Ziglyph CodePointIterator: result: 3769314, took: 2 | 568 | Ziglyph CodePointIterator: result: 3691806, took: 2.5 |
| 569 | zg CodePointIterator: result: 3769314, took: 3 | 569 | zg code_point.Iterator: result: 3691806, took: 3.3 |
| 570 | 570 | ||
| 571 | Ziglyph GraphemeIterator: result: 3691806, took: 48 | 571 | Ziglyph GraphemeIterator: result: 3691806, took: 78 |
| 572 | zg GraphemeIterator: result: 3691806, took: 16 | 572 | zg Graphemes.Iterator: result: 3691806, took: 31 |
| 573 | 573 | ||
| 574 | Ziglyph Normalizer.nfkc: result: 3934162, took: 416 | 574 | Ziglyph Normalizer.nfkc: result: 3856654, took: 411 |
| 575 | zg Normalize.nfkc: result: 3934162, took: 182 | 575 | zg Normalize.nfkc: result: 3856654, took: 208 |
| 576 | 576 | ||
| 577 | Ziglyph Normalizer.nfc: result: 3955798, took: 57 | 577 | Ziglyph Normalizer.nfc: result: 3878290, took: 56 |
| 578 | zg Normalize.nfc: result: 3955798, took: 28 | 578 | zg Normalize.nfc: result: 3878290, took: 31 |
| 579 | 579 | ||
| 580 | Ziglyph Normalizer.nfkd: result: 4006398, took: 172 | 580 | Ziglyph Normalizer.nfkd: result: 3928890, took: 163 |
| 581 | zg Normalize.nfkd: result: 4006398, took: 104 | 581 | zg Normalize.nfkd: result: 3928890, took: 101 |
| 582 | 582 | ||
| 583 | Ziglyph Normalizer.nfd: result: 4028034, took: 169 | 583 | Ziglyph Normalizer.nfd: result: 3950526, took: 160 |
| 584 | zg Normalize.nfd: result: 4028034, took: 104 | 584 | zg Normalize.nfd: result: 3950526, took: 101 |
| 585 | 585 | ||
| 586 | Ziglyph Normalizer.eql: result: 625, took: 337 | 586 | Ziglyph Normalizer.eql: result: 626, took: 321 |
| 587 | Zg Normalize.eql: result: 625, took: 53 | 587 | Zg Normalize.eql: result: 626, took: 60 |
| 588 | 588 | ||
| 589 | Ziglyph display_width.strWidth: result: 3700914, took: 71 | 589 | Ziglyph display_width.strWidth: result: 3700914, took: 89 |
| 590 | zg DisplayWidth.strWidth: result: 3700914, took: 24 | 590 | zg DisplayWidth.strWidth: result: 3700914, took: 46 |
| 591 | ``` | 591 | ``` |
| 592 | 592 | ||
| 593 | These results were obtained on an M1 Mac with 16 GiB of RAM. | 593 | These results were obtained on a MacBook Pro (2021) with M1 Pro and 16 GiB of RAM. |
| 594 | 594 | ||
| 595 | In contrast to Ziglyph, zg does not have: | 595 | In contrast to Ziglyph, zg does not have: |
| 596 | 596 | ||
diff --git a/bench/build.zig b/bench/build.zig index 66864c2..b7fb6af 100644 --- a/bench/build.zig +++ b/bench/build.zig | |||
| @@ -73,7 +73,7 @@ pub fn build(b: *std.Build) !void { | |||
| 73 | .name = "zg_caseless", | 73 | .name = "zg_caseless", |
| 74 | .src = "src/zg_caseless.zig", | 74 | .src = "src/zg_caseless.zig", |
| 75 | .imports = &.{ | 75 | .imports = &.{ |
| 76 | .{ .name = "CaseFold", .module = zg.module("CaseFold") }, | 76 | .{ .name = "CaseFolding", .module = zg.module("CaseFolding") }, |
| 77 | .{ .name = "Normalize", .module = zg.module("Normalize") }, | 77 | .{ .name = "Normalize", .module = zg.module("Normalize") }, |
| 78 | }, | 78 | }, |
| 79 | }, | 79 | }, |
| @@ -88,7 +88,7 @@ pub fn build(b: *std.Build) !void { | |||
| 88 | .name = "zg_grapheme", | 88 | .name = "zg_grapheme", |
| 89 | .src = "src/zg_grapheme.zig", | 89 | .src = "src/zg_grapheme.zig", |
| 90 | .imports = &.{ | 90 | .imports = &.{ |
| 91 | .{ .name = "grapheme", .module = zg.module("grapheme") }, | 91 | .{ .name = "Graphemes", .module = zg.module("Graphemes") }, |
| 92 | }, | 92 | }, |
| 93 | }, | 93 | }, |
| 94 | .{ | 94 | .{ |
| @@ -102,7 +102,7 @@ pub fn build(b: *std.Build) !void { | |||
| 102 | .name = "zg_case", | 102 | .name = "zg_case", |
| 103 | .src = "src/zg_case.zig", | 103 | .src = "src/zg_case.zig", |
| 104 | .imports = &.{ | 104 | .imports = &.{ |
| 105 | .{ .name = "CaseData", .module = zg.module("CaseData") }, | 105 | .{ .name = "LetterCasing", .module = zg.module("LetterCasing") }, |
| 106 | }, | 106 | }, |
| 107 | }, | 107 | }, |
| 108 | }; | 108 | }; |
| @@ -129,15 +129,15 @@ pub fn build(b: *std.Build) !void { | |||
| 129 | .target = target, | 129 | .target = target, |
| 130 | .optimize = optimize, | 130 | .optimize = optimize, |
| 131 | }); | 131 | }); |
| 132 | unit_tests.root_module.addImport("GenCatData", zg.module("GenCatData")); | 132 | unit_tests.root_module.addImport("GeneralCategories", zg.module("GeneralCategories")); |
| 133 | unit_tests.root_module.addImport("PropsData", zg.module("PropsData")); | 133 | unit_tests.root_module.addImport("Properties", zg.module("Properties")); |
| 134 | unit_tests.root_module.addImport("CaseData", zg.module("CaseData")); | 134 | unit_tests.root_module.addImport("LetterCasing", zg.module("LetterCasing")); |
| 135 | unit_tests.root_module.addImport("Normalize", zg.module("Normalize")); | 135 | unit_tests.root_module.addImport("Normalize", zg.module("Normalize")); |
| 136 | unit_tests.root_module.addImport("CaseFold", zg.module("CaseFold")); | 136 | unit_tests.root_module.addImport("CaseFolding", zg.module("CaseFolding")); |
| 137 | unit_tests.root_module.addImport("DisplayWidth", zg.module("DisplayWidth")); | 137 | unit_tests.root_module.addImport("DisplayWidth", zg.module("DisplayWidth")); |
| 138 | unit_tests.root_module.addImport("code_point", zg.module("code_point")); | 138 | unit_tests.root_module.addImport("code_point", zg.module("code_point")); |
| 139 | unit_tests.root_module.addImport("grapheme", zg.module("grapheme")); | 139 | unit_tests.root_module.addImport("Graphemes", zg.module("Graphemes")); |
| 140 | unit_tests.root_module.addImport("ScriptsData", zg.module("ScriptsData")); | 140 | unit_tests.root_module.addImport("Scripts", zg.module("Scripts")); |
| 141 | 141 | ||
| 142 | const run_unit_tests = b.addRunArtifact(unit_tests); | 142 | const run_unit_tests = b.addRunArtifact(unit_tests); |
| 143 | 143 | ||
diff --git a/bench/build.zig.zon b/bench/build.zig.zon index 2c56013..aec7f92 100644 --- a/bench/build.zig.zon +++ b/bench/build.zig.zon | |||
| @@ -1,12 +1,13 @@ | |||
| 1 | .{ | 1 | .{ |
| 2 | .name = "zg", | 2 | .name = .zg, |
| 3 | .version = "0.0.0", | 3 | .version = "0.0.0", |
| 4 | .minimum_zig_version = "0.12.0", | 4 | .minimum_zig_version = "0.14.0", |
| 5 | .fingerprint = 0x47df77786eba1512, | ||
| 5 | 6 | ||
| 6 | .dependencies = .{ | 7 | .dependencies = .{ |
| 7 | .ziglyph = .{ | 8 | .ziglyph = .{ |
| 8 | .url = "https://codeberg.org/dude_the_builder/ziglyph/archive/main.tar.gz", | 9 | .url = "https://codeberg.org/dgv/ziglyph/archive/main.tar.gz", |
| 9 | .hash = "12207831bce7d4abce57b5a98e8f3635811cfefd160bca022eb91fe905d36a02cf25", | 10 | .hash = "ziglyph-0.11.3-o3ARBKTuHwARGJBNZfNXfZsxxS1wCEbI745rgjYnyk4c", |
| 10 | }, | 11 | }, |
| 11 | .zg = .{ .path = ".." }, | 12 | .zg = .{ .path = ".." }, |
| 12 | }, | 13 | }, |
diff --git a/bench/run_benches.sh b/bench/run_benches.sh index 0210622..e2a26a8 100755 --- a/bench/run_benches.sh +++ b/bench/run_benches.sh | |||
| @@ -1,6 +1,9 @@ | |||
| 1 | #!/bin/sh | 1 | #!/bin/sh |
| 2 | 2 | ||
| 3 | zig build -Doptimize=ReleaseSafe | 3 | if ! zig build -Doptimize=ReleaseSafe; then |
| 4 | exit 1 | ||
| 5 | fi | ||
| 6 | |||
| 4 | cd zig-out/bin/ | 7 | cd zig-out/bin/ |
| 5 | 8 | ||
| 6 | echo "\nBinary sizes =======" | 9 | echo "\nBinary sizes =======" |
diff --git a/bench/src/tests.zig b/bench/src/tests.zig index a8a2a98..cf62709 100644 --- a/bench/src/tests.zig +++ b/bench/src/tests.zig | |||
| @@ -6,140 +6,133 @@ const expectEqualStrings = testing.expectEqualStrings; | |||
| 6 | 6 | ||
| 7 | const allocator = testing.allocator; | 7 | const allocator = testing.allocator; |
| 8 | 8 | ||
| 9 | const GenCatData = @import("GenCatData"); | 9 | const GeneralCategories = @import("GeneralCategories"); |
| 10 | 10 | ||
| 11 | test "General Category" { | 11 | test GeneralCategories { |
| 12 | const gcd = try GenCatData.init(allocator); | 12 | const gen_cat = try GeneralCategories.init(allocator); |
| 13 | defer gcd.deinit(); | 13 | defer gen_cat.deinit(allocator); |
| 14 | 14 | ||
| 15 | try expect(gcd.gc('A') == .Lu); // Lu: uppercase letter | 15 | try expect(gen_cat.gc('A') == .Lu); // Lu: uppercase letter |
| 16 | try expect(gcd.gc('3') == .Nd); // Nd: Decimal number | 16 | try expect(gen_cat.gc('3') == .Nd); // Nd: Decimal number |
| 17 | try expect(gcd.isControl(0)); | 17 | try expect(gen_cat.isControl(0)); |
| 18 | try expect(gcd.isLetter('z')); | 18 | try expect(gen_cat.isLetter('z')); |
| 19 | try expect(gcd.isMark('\u{301}')); | 19 | try expect(gen_cat.isMark('\u{301}')); |
| 20 | try expect(gcd.isNumber('3')); | 20 | try expect(gen_cat.isNumber('3')); |
| 21 | try expect(gcd.isPunctuation('[')); | 21 | try expect(gen_cat.isPunctuation('[')); |
| 22 | try expect(gcd.isSeparator(' ')); | 22 | try expect(gen_cat.isSeparator(' ')); |
| 23 | try expect(gcd.isSymbol('©')); | 23 | try expect(gen_cat.isSymbol('©')); |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | const PropsData = @import("PropsData"); | 26 | const Properties = @import("Properties"); |
| 27 | 27 | ||
| 28 | test "Properties" { | 28 | test Properties { |
| 29 | const pd = try PropsData.init(allocator); | 29 | const props = try Properties.init(allocator); |
| 30 | defer pd.deinit(); | 30 | defer props.deinit(allocator); |
| 31 | 31 | ||
| 32 | try expect(pd.isMath('+')); | 32 | try expect(props.isMath('+')); |
| 33 | try expect(pd.isAlphabetic('Z')); | 33 | try expect(props.isAlphabetic('Z')); |
| 34 | try expect(pd.isWhitespace(' ')); | 34 | try expect(props.isWhitespace(' ')); |
| 35 | try expect(pd.isHexDigit('f')); | 35 | try expect(props.isHexDigit('f')); |
| 36 | try expect(!pd.isHexDigit('z')); | 36 | try expect(!props.isHexDigit('z')); |
| 37 | 37 | ||
| 38 | try expect(pd.isDiacritic('\u{301}')); | 38 | try expect(props.isDiacritic('\u{301}')); |
| 39 | try expect(pd.isIdStart('Z')); // Identifier start character | 39 | try expect(props.isIdStart('Z')); // Identifier start character |
| 40 | try expect(!pd.isIdStart('1')); | 40 | try expect(!props.isIdStart('1')); |
| 41 | try expect(pd.isIdContinue('1')); | 41 | try expect(props.isIdContinue('1')); |
| 42 | try expect(pd.isXidStart('\u{b33}')); // Extended identifier start character | 42 | try expect(props.isXidStart('\u{b33}')); // Extended identifier start character |
| 43 | try expect(pd.isXidContinue('\u{e33}')); | 43 | try expect(props.isXidContinue('\u{e33}')); |
| 44 | try expect(!pd.isXidStart('1')); | 44 | try expect(!props.isXidStart('1')); |
| 45 | 45 | ||
| 46 | // Note surprising Unicode numeric types! | 46 | // Note surprising Unicode numeric types! |
| 47 | try expect(pd.isNumeric('\u{277f}')); | 47 | try expect(props.isNumeric('\u{277f}')); |
| 48 | try expect(!pd.isNumeric('3')); | 48 | try expect(!props.isNumeric('3')); |
| 49 | try expect(pd.isDigit('\u{2070}')); | 49 | try expect(props.isDigit('\u{2070}')); |
| 50 | try expect(!pd.isDigit('3')); | 50 | try expect(!props.isDigit('3')); |
| 51 | try expect(pd.isDecimal('3')); | 51 | try expect(props.isDecimal('3')); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | const CaseData = @import("CaseData"); | 54 | const LetterCasing = @import("LetterCasing"); |
| 55 | 55 | ||
| 56 | test "Case" { | 56 | test LetterCasing { |
| 57 | const cd = try CaseData.init(allocator); | 57 | const case = try LetterCasing.init(allocator); |
| 58 | defer cd.deinit(); | 58 | defer case.deinit(allocator); |
| 59 | 59 | ||
| 60 | try expect(cd.isUpper('A')); | 60 | try expect(case.isUpper('A')); |
| 61 | try expect('A' == cd.toUpper('a')); | 61 | try expect('A' == case.toUpper('a')); |
| 62 | try expect(cd.isLower('a')); | 62 | try expect(case.isLower('a')); |
| 63 | try expect('a' == cd.toLower('A')); | 63 | try expect('a' == case.toLower('A')); |
| 64 | 64 | ||
| 65 | try expect(cd.isCased('É')); | 65 | try expect(case.isCased('É')); |
| 66 | try expect(!cd.isCased('3')); | 66 | try expect(!case.isCased('3')); |
| 67 | 67 | ||
| 68 | try expect(cd.isUpperStr("HELLO 123!")); | 68 | try expect(case.isUpperStr("HELLO 123!")); |
| 69 | const ucased = try cd.toUpperStr(allocator, "hello 123"); | 69 | const ucased = try case.toUpperStr(allocator, "hello 123"); |
| 70 | defer allocator.free(ucased); | 70 | defer allocator.free(ucased); |
| 71 | try expectEqualStrings("HELLO 123", ucased); | 71 | try expectEqualStrings("HELLO 123", ucased); |
| 72 | 72 | ||
| 73 | try expect(cd.isLowerStr("hello 123!")); | 73 | try expect(case.isLowerStr("hello 123!")); |
| 74 | const lcased = try cd.toLowerStr(allocator, "HELLO 123"); | 74 | const lcased = try case.toLowerStr(allocator, "HELLO 123"); |
| 75 | defer allocator.free(lcased); | 75 | defer allocator.free(lcased); |
| 76 | try expectEqualStrings("hello 123", lcased); | 76 | try expectEqualStrings("hello 123", lcased); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | const Normalize = @import("Normalize"); | 79 | const Normalize = @import("Normalize"); |
| 80 | 80 | ||
| 81 | test "Normalization" { | 81 | test Normalize { |
| 82 | var norm_data = try Normalize.NormData.init(allocator); | 82 | const normalize = try Normalize.init(allocator); |
| 83 | defer norm_data.deinit(); | 83 | defer normalize.deinit(allocator); |
| 84 | const n = Normalize{ .norm_data = &norm_data }; | ||
| 85 | 84 | ||
| 86 | // NFD: Canonical decomposition | 85 | // NFD: Canonical decomposition |
| 87 | const nfd_result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 86 | const nfd_result = try normalize.nfd(allocator, "Héllo World! \u{3d3}"); |
| 88 | defer nfd_result.deinit(); | 87 | defer nfd_result.deinit(allocator); |
| 89 | try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); | 88 | try expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", nfd_result.slice); |
| 90 | 89 | ||
| 91 | // NFKD: Compatibility decomposition | 90 | // NFKD: Compatibility decomposition |
| 92 | const nfkd_result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 91 | const nfkd_result = try normalize.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 93 | defer nfkd_result.deinit(); | 92 | defer nfkd_result.deinit(allocator); |
| 94 | try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); | 93 | try expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", nfkd_result.slice); |
| 95 | 94 | ||
| 96 | // NFC: Canonical composition | 95 | // NFC: Canonical composition |
| 97 | const nfc_result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 96 | const nfc_result = try normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 98 | defer nfc_result.deinit(); | 97 | defer nfc_result.deinit(allocator); |
| 99 | try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); | 98 | try expectEqualStrings("Complex char: \u{3D3}", nfc_result.slice); |
| 100 | 99 | ||
| 101 | // NFKC: Compatibility composition | 100 | // NFKC: Compatibility composition |
| 102 | const nfkc_result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 101 | const nfkc_result = try normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 103 | defer nfkc_result.deinit(); | 102 | defer nfkc_result.deinit(allocator); |
| 104 | try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); | 103 | try expectEqualStrings("Complex char: \u{038E}", nfkc_result.slice); |
| 105 | 104 | ||
| 106 | // Test for equality of two strings after normalizing to NFC. | 105 | // Test for equality of two strings after normalizing to NFC. |
| 107 | try expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 106 | try expect(try normalize.eql(allocator, "foé", "foe\u{0301}")); |
| 108 | try expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 107 | try expect(try normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| 109 | } | 108 | } |
| 110 | 109 | ||
| 111 | const CaseFold = @import("CaseFold"); | 110 | const CaseFolding = @import("CaseFolding"); |
| 112 | 111 | ||
| 113 | test "Caseless matching" { | 112 | test CaseFolding { |
| 114 | var norm_data = try Normalize.NormData.init(allocator); | 113 | const case_fold = try CaseFolding.init(allocator); |
| 115 | defer norm_data.deinit(); | 114 | defer case_fold.deinit(allocator); |
| 116 | const n = Normalize{ .norm_data = &norm_data }; | ||
| 117 | |||
| 118 | const cfd = try CaseFold.FoldData.init(allocator); | ||
| 119 | defer cfd.deinit(); | ||
| 120 | const cf = CaseFold{ .fold_data = &cfd }; | ||
| 121 | 115 | ||
| 122 | // compatCaselessMatch provides the deepest level of caseless | 116 | // compatCaselessMatch provides the deepest level of caseless |
| 123 | // matching because it decomposes and composes fully to NFKC. | 117 | // matching because it decomposes and composes fully to NFKC. |
| 124 | const a = "Héllo World! \u{3d3}"; | 118 | const a = "Héllo World! \u{3d3}"; |
| 125 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 119 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 126 | try expect(try cf.compatCaselessMatch(allocator, &n, a, b)); | 120 | try expect(try case_fold.compatCaselessMatch(allocator, a, b)); |
| 127 | 121 | ||
| 128 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 122 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 129 | try expect(try cf.compatCaselessMatch(allocator, &n, a, c)); | 123 | try expect(try case_fold.compatCaselessMatch(allocator, a, c)); |
| 130 | 124 | ||
| 131 | // canonCaselessMatch isn't as comprehensive as compatCaselessMatch | 125 | // canonCaselessMatch isn't as comprehensive as compatCaselessMatch |
| 132 | // because it only decomposes and composes to NFC. But it's faster. | 126 | // because it only decomposes and composes to NFC. But it's faster. |
| 133 | try expect(!try cf.canonCaselessMatch(allocator, &n, a, b)); | 127 | try expect(!try case_fold.canonCaselessMatch(allocator, a, b)); |
| 134 | try expect(try cf.canonCaselessMatch(allocator, &n, a, c)); | 128 | try expect(try case_fold.canonCaselessMatch(allocator, a, c)); |
| 135 | } | 129 | } |
| 136 | 130 | ||
| 137 | const DisplayWidth = @import("DisplayWidth"); | 131 | const DisplayWidth = @import("DisplayWidth"); |
| 138 | 132 | ||
| 139 | test "Display width" { | 133 | test DisplayWidth { |
| 140 | const dwd = try DisplayWidth.DisplayWidthData.init(allocator); | 134 | const dw = try DisplayWidth.init(allocator); |
| 141 | defer dwd.deinit(); | 135 | defer dw.deinit(allocator); |
| 142 | const dw = DisplayWidth{ .data = &dwd }; | ||
| 143 | 136 | ||
| 144 | // String display width | 137 | // String display width |
| 145 | try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); | 138 | try expectEqual(@as(usize, 5), dw.strWidth("Hello\r\n")); |
| @@ -197,13 +190,13 @@ test "Code point iterator" { | |||
| 197 | } | 190 | } |
| 198 | } | 191 | } |
| 199 | 192 | ||
| 200 | const grapheme = @import("grapheme"); | 193 | const Graphemes = @import("Graphemes"); |
| 201 | 194 | ||
| 202 | test "Grapheme cluster iterator" { | 195 | test "Grapheme cluster iterator" { |
| 203 | const gd = try grapheme.GraphemeData.init(allocator); | 196 | const graphemes = try Graphemes.init(allocator); |
| 204 | defer gd.deinit(); | 197 | defer graphemes.deinit(allocator); |
| 205 | const str = "He\u{301}"; // Hé | 198 | const str = "He\u{301}"; // Hé |
| 206 | var iter = grapheme.Iterator.init(str, &gd); | 199 | var iter = graphemes.iterator(str); |
| 207 | var i: usize = 0; | 200 | var i: usize = 0; |
| 208 | 201 | ||
| 209 | while (iter.next()) |gc| : (i += 1) { | 202 | while (iter.next()) |gc| : (i += 1) { |
| @@ -217,13 +210,13 @@ test "Grapheme cluster iterator" { | |||
| 217 | } | 210 | } |
| 218 | } | 211 | } |
| 219 | 212 | ||
| 220 | const ScriptsData = @import("ScriptsData"); | 213 | const Scripts = @import("Scripts"); |
| 221 | 214 | ||
| 222 | test "Scripts" { | 215 | test Scripts { |
| 223 | const sd = try ScriptsData.init(allocator); | 216 | const scripts = try Scripts.init(allocator); |
| 224 | defer sd.deinit(); | 217 | defer scripts.deinit(allocator); |
| 225 | 218 | ||
| 226 | try expect(sd.script('A') == .Latin); | 219 | try expect(scripts.script('A') == .Latin); |
| 227 | try expect(sd.script('Ω') == .Greek); | 220 | try expect(scripts.script('Ω') == .Greek); |
| 228 | try expect(sd.script('צ') == .Hebrew); | 221 | try expect(scripts.script('צ') == .Hebrew); |
| 229 | } | 222 | } |
diff --git a/bench/src/zg_case.zig b/bench/src/zg_case.zig index 7308e82..c444343 100644 --- a/bench/src/zg_case.zig +++ b/bench/src/zg_case.zig | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | 2 | ||
| 3 | const CaseData = @import("CaseData"); | 3 | const LetterCasing = @import("LetterCasing"); |
| 4 | 4 | ||
| 5 | pub fn main() !void { | 5 | pub fn main() !void { |
| 6 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | 6 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| @@ -18,15 +18,15 @@ pub fn main() !void { | |||
| 18 | ); | 18 | ); |
| 19 | defer allocator.free(input); | 19 | defer allocator.free(input); |
| 20 | 20 | ||
| 21 | const case_data = try CaseData.init(allocator); | 21 | const case = try LetterCasing.init(allocator); |
| 22 | 22 | ||
| 23 | var iter = std.mem.splitScalar(u8, input, '\n'); | 23 | var iter = std.mem.splitScalar(u8, input, '\n'); |
| 24 | var result: usize = 0; | 24 | var result: usize = 0; |
| 25 | var timer = try std.time.Timer.start(); | 25 | var timer = try std.time.Timer.start(); |
| 26 | 26 | ||
| 27 | while (iter.next()) |line| { | 27 | while (iter.next()) |line| { |
| 28 | const upper = try case_data.toUpperStr(allocator, line); | 28 | const upper = try case.toUpperStr(allocator, line); |
| 29 | const lower = try case_data.toLowerStr(allocator, line); | 29 | const lower = try case.toLowerStr(allocator, line); |
| 30 | result += upper.len + lower.len; | 30 | result += upper.len + lower.len; |
| 31 | } | 31 | } |
| 32 | std.debug.print("zg toUpperStr/toLowerStr: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); | 32 | std.debug.print("zg toUpperStr/toLowerStr: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); |
| @@ -36,8 +36,8 @@ pub fn main() !void { | |||
| 36 | timer.reset(); | 36 | timer.reset(); |
| 37 | 37 | ||
| 38 | while (iter.next()) |line| { | 38 | while (iter.next()) |line| { |
| 39 | if (case_data.isUpperStr(line)) result += 1; | 39 | if (case.isUpperStr(line)) result += 1; |
| 40 | if (case_data.isLowerStr(line)) result += 2; | 40 | if (case.isLowerStr(line)) result += 2; |
| 41 | } | 41 | } |
| 42 | std.debug.print("zg isUpperStr/isLowerStr: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); | 42 | std.debug.print("zg isUpperStr/isLowerStr: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); |
| 43 | } | 43 | } |
diff --git a/bench/src/zg_caseless.zig b/bench/src/zg_caseless.zig index fa350e9..5d8d591 100644 --- a/bench/src/zg_caseless.zig +++ b/bench/src/zg_caseless.zig | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | 2 | ||
| 3 | const CaseFold = @import("CaseFold"); | 3 | const CaseFolding = @import("CaseFolding"); |
| 4 | const Normalize = @import("Normalize"); | ||
| 5 | 4 | ||
| 6 | pub fn main() !void { | 5 | pub fn main() !void { |
| 7 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | 6 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| @@ -19,11 +18,7 @@ pub fn main() !void { | |||
| 19 | ); | 18 | ); |
| 20 | defer allocator.free(input); | 19 | defer allocator.free(input); |
| 21 | 20 | ||
| 22 | const fold_data = try CaseFold.FoldData.init(allocator); | 21 | const case_fold = try CaseFolding.init(allocator); |
| 23 | var case_fold = CaseFold{ .fold_data = &fold_data }; | ||
| 24 | var norm_data: Normalize.NormData = undefined; | ||
| 25 | try Normalize.NormData.init(&norm_data, allocator); | ||
| 26 | var normalize = Normalize{ .norm_data = &norm_data }; | ||
| 27 | 22 | ||
| 28 | var iter = std.mem.splitScalar(u8, input, '\n'); | 23 | var iter = std.mem.splitScalar(u8, input, '\n'); |
| 29 | var result: usize = 0; | 24 | var result: usize = 0; |
| @@ -34,14 +29,13 @@ pub fn main() !void { | |||
| 34 | while (iter.next()) |line| { | 29 | while (iter.next()) |line| { |
| 35 | if (try case_fold.compatCaselessMatch( | 30 | if (try case_fold.compatCaselessMatch( |
| 36 | allocator, | 31 | allocator, |
| 37 | &normalize, | ||
| 38 | prev_line, | 32 | prev_line, |
| 39 | line, | 33 | line, |
| 40 | )) result += 1; | 34 | )) result += 1; |
| 41 | @memcpy(buf[0..line.len], line); | 35 | @memcpy(buf[0..line.len], line); |
| 42 | prev_line = buf[0..line.len]; | 36 | prev_line = buf[0..line.len]; |
| 43 | } | 37 | } |
| 44 | std.debug.print("zg CaseFold.compatCaselessMatch: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); | 38 | std.debug.print("zg CaseFolding.compatCaselessMatch: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); |
| 45 | 39 | ||
| 46 | result = 0; | 40 | result = 0; |
| 47 | iter.reset(); | 41 | iter.reset(); |
| @@ -50,12 +44,11 @@ pub fn main() !void { | |||
| 50 | while (iter.next()) |line| { | 44 | while (iter.next()) |line| { |
| 51 | if (try case_fold.canonCaselessMatch( | 45 | if (try case_fold.canonCaselessMatch( |
| 52 | allocator, | 46 | allocator, |
| 53 | &normalize, | ||
| 54 | prev_line, | 47 | prev_line, |
| 55 | line, | 48 | line, |
| 56 | )) result += 1; | 49 | )) result += 1; |
| 57 | @memcpy(buf[0..line.len], line); | 50 | @memcpy(buf[0..line.len], line); |
| 58 | prev_line = buf[0..line.len]; | 51 | prev_line = buf[0..line.len]; |
| 59 | } | 52 | } |
| 60 | std.debug.print("zg CaseFold.canonCaselessMatch: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); | 53 | std.debug.print("zg CaseFolding.canonCaselessMatch: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); |
| 61 | } | 54 | } |
diff --git a/bench/src/zg_codepoint.zig b/bench/src/zg_codepoint.zig index 7a8ccbe..2fe31f3 100644 --- a/bench/src/zg_codepoint.zig +++ b/bench/src/zg_codepoint.zig | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | 2 | ||
| 3 | const CodePointIterator = @import("code_point").Iterator; | 3 | const code_point = @import("code_point"); |
| 4 | 4 | ||
| 5 | pub fn main() !void { | 5 | pub fn main() !void { |
| 6 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | 6 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| @@ -18,7 +18,7 @@ pub fn main() !void { | |||
| 18 | ); | 18 | ); |
| 19 | defer allocator.free(input); | 19 | defer allocator.free(input); |
| 20 | 20 | ||
| 21 | var iter = CodePointIterator{ .bytes = input }; | 21 | var iter = code_point.Iterator{ .bytes = input }; |
| 22 | var result: usize = 0; | 22 | var result: usize = 0; |
| 23 | var timer = try std.time.Timer.start(); | 23 | var timer = try std.time.Timer.start(); |
| 24 | 24 | ||
diff --git a/bench/src/zg_grapheme.zig b/bench/src/zg_grapheme.zig index 74fb8b5..ee40de5 100644 --- a/bench/src/zg_grapheme.zig +++ b/bench/src/zg_grapheme.zig | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | 2 | ||
| 3 | const GraphemeData = @import("grapheme").GraphemeData; | 3 | const Graphemes = @import("Graphemes"); |
| 4 | const GraphemeIterator = @import("grapheme").Iterator; | ||
| 5 | 4 | ||
| 6 | pub fn main() !void { | 5 | pub fn main() !void { |
| 7 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | 6 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| @@ -19,11 +18,11 @@ pub fn main() !void { | |||
| 19 | ); | 18 | ); |
| 20 | defer allocator.free(input); | 19 | defer allocator.free(input); |
| 21 | 20 | ||
| 22 | const grapheme_data = try GraphemeData.init(allocator); | 21 | const graphemes = try Graphemes.init(allocator); |
| 23 | var iter = GraphemeIterator.init(input, &grapheme_data); | 22 | var iter = graphemes.iterator(input); |
| 24 | var result: usize = 0; | 23 | var result: usize = 0; |
| 25 | var timer = try std.time.Timer.start(); | 24 | var timer = try std.time.Timer.start(); |
| 26 | 25 | ||
| 27 | while (iter.next()) |_| result += 1; | 26 | while (iter.next()) |_| result += 1; |
| 28 | std.debug.print("zg GraphemeIterator: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); | 27 | std.debug.print("zg Graphemes.Iterator: result: {}, took: {}\n", .{ result, std.fmt.fmtDuration(timer.lap()) }); |
| 29 | } | 28 | } |
diff --git a/bench/src/zg_normalize.zig b/bench/src/zg_normalize.zig index 1e2cfab..268c060 100644 --- a/bench/src/zg_normalize.zig +++ b/bench/src/zg_normalize.zig | |||
| @@ -18,9 +18,7 @@ pub fn main() !void { | |||
| 18 | ); | 18 | ); |
| 19 | defer allocator.free(input); | 19 | defer allocator.free(input); |
| 20 | 20 | ||
| 21 | var norm_data: Normalize.NormData = undefined; | 21 | const normalize = try Normalize.init(allocator); |
| 22 | try Normalize.NormData.init(&norm_data, allocator); | ||
| 23 | const normalize = Normalize{ .norm_data = &norm_data }; | ||
| 24 | 22 | ||
| 25 | var iter = std.mem.splitScalar(u8, input, '\n'); | 23 | var iter = std.mem.splitScalar(u8, input, '\n'); |
| 26 | var result: usize = 0; | 24 | var result: usize = 0; |
diff --git a/bench/src/zg_width.zig b/bench/src/zg_width.zig index 041d2c5..b2db3fa 100644 --- a/bench/src/zg_width.zig +++ b/bench/src/zg_width.zig | |||
| @@ -18,8 +18,7 @@ pub fn main() !void { | |||
| 18 | ); | 18 | ); |
| 19 | defer allocator.free(input); | 19 | defer allocator.free(input); |
| 20 | 20 | ||
| 21 | const width_data = try DisplayWidth.DisplayWidthData.init(allocator); | 21 | const display_width = try DisplayWidth.init(allocator); |
| 22 | const display_width = DisplayWidth{ .data = &width_data }; | ||
| 23 | 22 | ||
| 24 | var iter = std.mem.splitScalar(u8, input, '\n'); | 23 | var iter = std.mem.splitScalar(u8, input, '\n'); |
| 25 | var result: usize = 0; | 24 | var result: usize = 0; |