diff options
| author | 2024-03-31 09:59:51 -0400 | |
|---|---|---|
| committer | 2024-03-31 09:59:51 -0400 | |
| commit | 200c617c865a5952f0bd12378802cc06ea3eb1c2 (patch) | |
| tree | 2af456d4c62a08330cf961e7237f083fc4566370 /src/Normalize.zig | |
| parent | Split out Unicode tests to separate file (diff) | |
| download | zg-200c617c865a5952f0bd12378802cc06ea3eb1c2.tar.gz zg-200c617c865a5952f0bd12378802cc06ea3eb1c2.tar.xz zg-200c617c865a5952f0bd12378802cc06ea3eb1c2.zip | |
Updated README
Diffstat (limited to 'src/Normalize.zig')
| -rw-r--r-- | src/Normalize.zig | 41 |
1 files changed, 0 insertions, 41 deletions
diff --git a/src/Normalize.zig b/src/Normalize.zig index f437f4f..85e3aa3 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -572,47 +572,6 @@ test "eql" { | |||
| 572 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 572 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| 573 | } | 573 | } |
| 574 | 574 | ||
| 575 | // FCD | ||
| 576 | fn getLeadCcc(self: Self, cp: u21) u8 { | ||
| 577 | const dc = self.mapping(cp, .nfd); | ||
| 578 | const dcp = if (dc.form == .same) cp else dc.cps[0]; | ||
| 579 | return self.norm_data.ccc_data.ccc(dcp); | ||
| 580 | } | ||
| 581 | |||
| 582 | fn getTrailCcc(self: Self, cp: u21) u8 { | ||
| 583 | const dc = self.mapping(cp, .nfd); | ||
| 584 | const dcp = if (dc.form == .same) cp else dc.cps[dc.cps.len - 1]; | ||
| 585 | return self.norm_data.ccc_data.ccc(dcp); | ||
| 586 | } | ||
| 587 | |||
| 588 | // Fast check to detect if a string is already in NFC or NFD form. | ||
| 589 | fn isFcd(self: Self, str: []const u8) bool { | ||
| 590 | var prev_ccc: u8 = 0; | ||
| 591 | var cp_iter = CodePointIterator{ .bytes = str }; | ||
| 592 | |||
| 593 | return while (cp_iter.next()) |cp| { | ||
| 594 | const ccc = self.getLeadCcc(cp.code); | ||
| 595 | if (ccc != 0 and ccc < prev_ccc) break false; | ||
| 596 | prev_ccc = self.getTrailCcc(cp.code); | ||
| 597 | } else true; | ||
| 598 | } | ||
| 599 | |||
| 600 | test "isFcd" { | ||
| 601 | const allocator = testing.allocator; | ||
| 602 | const data = try NormData.init(allocator); | ||
| 603 | defer data.deinit(); | ||
| 604 | const n = Self{ .norm_data = &data }; | ||
| 605 | |||
| 606 | const is_nfc = "José \u{3D3}"; | ||
| 607 | try testing.expect(n.isFcd(is_nfc)); | ||
| 608 | |||
| 609 | const is_nfd = "Jose\u{301} \u{3d2}\u{301}"; | ||
| 610 | try testing.expect(n.isFcd(is_nfd)); | ||
| 611 | |||
| 612 | const not_fcd = "Jose\u{301} \u{3d2}\u{315}\u{301}"; | ||
| 613 | try testing.expect(!n.isFcd(not_fcd)); | ||
| 614 | } | ||
| 615 | |||
| 616 | /// Returns true if `str` only contains Latin-1 Supplement | 575 | /// Returns true if `str` only contains Latin-1 Supplement |
| 617 | /// code points. Uses SIMD if possible. | 576 | /// code points. Uses SIMD if possible. |
| 618 | pub fn isLatin1Only(str: []const u8) bool { | 577 | pub fn isLatin1Only(str: []const u8) bool { |