diff options
| author | 2024-03-01 20:49:49 -0400 | |
|---|---|---|
| committer | 2024-03-01 20:49:49 -0400 | |
| commit | 68b01d794dcf145fb11603a238c647b7ca998f84 (patch) | |
| tree | 5758ec9f6622105d4df7e2990c0e4708530d44ff /src/Caser.zig | |
| parent | Moved case fold stuff to src/Caser.zig (diff) | |
| download | zg-68b01d794dcf145fb11603a238c647b7ca998f84.tar.gz zg-68b01d794dcf145fb11603a238c647b7ca998f84.tar.xz zg-68b01d794dcf145fb11603a238c647b7ca998f84.zip | |
Added canonical caseless match to Caser
Diffstat (limited to 'src/Caser.zig')
| -rw-r--r-- | src/Caser.zig | 85 |
1 files changed, 79 insertions, 6 deletions
diff --git a/src/Caser.zig b/src/Caser.zig index d02370a..43a3a5b 100644 --- a/src/Caser.zig +++ b/src/Caser.zig | |||
| @@ -50,13 +50,13 @@ pub fn compatCaselessMatch( | |||
| 50 | const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd); | 50 | const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd); |
| 51 | defer allocator.free(nfd_a); | 51 | defer allocator.free(nfd_a); |
| 52 | 52 | ||
| 53 | var need_frr_cf_nfd_a = false; | 53 | var need_free_cf_nfd_a = false; |
| 54 | var cf_nfd_a: []const u21 = nfd_a; | 54 | var cf_nfd_a: []const u21 = nfd_a; |
| 55 | if (self.changesWhenCaseFolded(nfd_a)) { | 55 | if (self.changesWhenCaseFolded(nfd_a)) { |
| 56 | cf_nfd_a = try self.caseFold(allocator, nfd_a); | 56 | cf_nfd_a = try self.caseFold(allocator, nfd_a); |
| 57 | need_frr_cf_nfd_a = true; | 57 | need_free_cf_nfd_a = true; |
| 58 | } | 58 | } |
| 59 | defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a); | 59 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| 60 | 60 | ||
| 61 | const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a); | 61 | const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a); |
| 62 | defer allocator.free(nfkd_cf_nfd_a); | 62 | defer allocator.free(nfkd_cf_nfd_a); |
| @@ -69,13 +69,13 @@ pub fn compatCaselessMatch( | |||
| 69 | const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd); | 69 | const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd); |
| 70 | defer allocator.free(nfd_b); | 70 | defer allocator.free(nfd_b); |
| 71 | 71 | ||
| 72 | var need_frr_cf_nfd_b = false; | 72 | var need_free_cf_nfd_b = false; |
| 73 | var cf_nfd_b: []const u21 = nfd_b; | 73 | var cf_nfd_b: []const u21 = nfd_b; |
| 74 | if (self.changesWhenCaseFolded(nfd_b)) { | 74 | if (self.changesWhenCaseFolded(nfd_b)) { |
| 75 | cf_nfd_b = try self.caseFold(allocator, nfd_b); | 75 | cf_nfd_b = try self.caseFold(allocator, nfd_b); |
| 76 | need_frr_cf_nfd_b = true; | 76 | need_free_cf_nfd_b = true; |
| 77 | } | 77 | } |
| 78 | defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b); | 78 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| 79 | 79 | ||
| 80 | const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b); | 80 | const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b); |
| 81 | defer allocator.free(nfkd_cf_nfd_b); | 81 | defer allocator.free(nfkd_cf_nfd_b); |
| @@ -107,3 +107,76 @@ test "compatCaselessMatch" { | |||
| 107 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 107 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 108 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c)); | 108 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c)); |
| 109 | } | 109 | } |
| 110 | |||
| 111 | pub fn canonCaselessMatch( | ||
| 112 | self: Self, | ||
| 113 | allocator: mem.Allocator, | ||
| 114 | normalizer: *const Normalizer, | ||
| 115 | a: []const u8, | ||
| 116 | b: []const u8, | ||
| 117 | ) !bool { | ||
| 118 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | ||
| 119 | |||
| 120 | // Process a | ||
| 121 | const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd); | ||
| 122 | defer allocator.free(nfd_a); | ||
| 123 | |||
| 124 | var need_free_cf_nfd_a = false; | ||
| 125 | var cf_nfd_a: []const u21 = nfd_a; | ||
| 126 | if (self.changesWhenCaseFolded(nfd_a)) { | ||
| 127 | cf_nfd_a = try self.caseFold(allocator, nfd_a); | ||
| 128 | need_free_cf_nfd_a = true; | ||
| 129 | } | ||
| 130 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | ||
| 131 | |||
| 132 | var need_free_nfd_cf_nfd_a = false; | ||
| 133 | var nfd_cf_nfd_a = cf_nfd_a; | ||
| 134 | if (!need_free_cf_nfd_a) { | ||
| 135 | nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a); | ||
| 136 | need_free_nfd_cf_nfd_a = true; | ||
| 137 | } | ||
| 138 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); | ||
| 139 | |||
| 140 | // Process b | ||
| 141 | const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd); | ||
| 142 | defer allocator.free(nfd_b); | ||
| 143 | |||
| 144 | var need_free_cf_nfd_b = false; | ||
| 145 | var cf_nfd_b: []const u21 = nfd_b; | ||
| 146 | if (self.changesWhenCaseFolded(nfd_b)) { | ||
| 147 | cf_nfd_b = try self.caseFold(allocator, nfd_b); | ||
| 148 | need_free_cf_nfd_b = true; | ||
| 149 | } | ||
| 150 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | ||
| 151 | |||
| 152 | var need_free_nfd_cf_nfd_b = false; | ||
| 153 | var nfd_cf_nfd_b = cf_nfd_b; | ||
| 154 | if (!need_free_cf_nfd_b) { | ||
| 155 | nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b); | ||
| 156 | need_free_nfd_cf_nfd_b = true; | ||
| 157 | } | ||
| 158 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); | ||
| 159 | |||
| 160 | return mem.eql(u21, nfd_cf_nfd_a, nfd_cf_nfd_b); | ||
| 161 | } | ||
| 162 | |||
| 163 | test "canonCaselessMatch" { | ||
| 164 | const allocator = testing.allocator; | ||
| 165 | |||
| 166 | var norm_data = try Normalizer.NormData.init(allocator); | ||
| 167 | defer norm_data.deinit(); | ||
| 168 | const n = Normalizer{ .norm_data = &norm_data }; | ||
| 169 | |||
| 170 | var fold_data = try FoldData.init(allocator); | ||
| 171 | defer fold_data.deinit(); | ||
| 172 | const caser = Self{ .fold_data = &fold_data }; | ||
| 173 | |||
| 174 | try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!")); | ||
| 175 | |||
| 176 | const a = "Héllo World! \u{3d3}"; | ||
| 177 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | ||
| 178 | try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b)); | ||
| 179 | |||
| 180 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | ||
| 181 | try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c)); | ||
| 182 | } | ||