diff options
| author | 2026-02-05 07:07:40 -0500 | |
|---|---|---|
| committer | 2026-02-05 07:07:40 -0500 | |
| commit | 95f9487f6a7bde2d7266399bdf6843b97cc1b301 (patch) | |
| tree | 122cd20fa574861e807844974b49eb2f91285d3c /src/CaseFolding.zig | |
| parent | Teasing out canonicalization (diff) | |
| download | zg-95f9487f6a7bde2d7266399bdf6843b97cc1b301.tar.gz zg-95f9487f6a7bde2d7266399bdf6843b97cc1b301.tar.xz zg-95f9487f6a7bde2d7266399bdf6843b97cc1b301.zip | |
Base units do not allocate
CanonData included. I may still sort out caseless matching without
allocation, but that's a stretch goal.
Closes #86
Closes #85
Diffstat (limited to 'src/CaseFolding.zig')
| -rw-r--r-- | src/CaseFolding.zig | 40 |
1 files changed, 16 insertions, 24 deletions
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 88f047c..d69cddc 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig | |||
| @@ -100,14 +100,13 @@ fn isCwcfException(cp: u21) bool { | |||
| 100 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. | 100 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. |
| 101 | pub fn compatCaselessMatch( | 101 | pub fn compatCaselessMatch( |
| 102 | allocator: Allocator, | 102 | allocator: Allocator, |
| 103 | normalize: Normalize, | ||
| 104 | a: []const u8, | 103 | a: []const u8, |
| 105 | b: []const u8, | 104 | b: []const u8, |
| 106 | ) Allocator.Error!bool { | 105 | ) Allocator.Error!bool { |
| 107 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 106 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 108 | 107 | ||
| 109 | // Process a | 108 | // Process a |
| 110 | const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); | 109 | const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); |
| 111 | defer allocator.free(nfd_a); | 110 | defer allocator.free(nfd_a); |
| 112 | 111 | ||
| 113 | var need_free_cf_nfd_a = false; | 112 | var need_free_cf_nfd_a = false; |
| @@ -118,15 +117,15 @@ pub fn compatCaselessMatch( | |||
| 118 | } | 117 | } |
| 119 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | 118 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| 120 | 119 | ||
| 121 | const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a); | 120 | const nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfd_a); |
| 122 | defer allocator.free(nfkd_cf_nfd_a); | 121 | defer allocator.free(nfkd_cf_nfd_a); |
| 123 | const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); | 122 | const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); |
| 124 | defer allocator.free(cf_nfkd_cf_nfd_a); | 123 | defer allocator.free(cf_nfkd_cf_nfd_a); |
| 125 | const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); | 124 | const nfkd_cf_nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); |
| 126 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); | 125 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); |
| 127 | 126 | ||
| 128 | // Process b | 127 | // Process b |
| 129 | const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); | 128 | const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); |
| 130 | defer allocator.free(nfd_b); | 129 | defer allocator.free(nfd_b); |
| 131 | 130 | ||
| 132 | var need_free_cf_nfd_b = false; | 131 | var need_free_cf_nfd_b = false; |
| @@ -137,11 +136,11 @@ pub fn compatCaselessMatch( | |||
| 137 | } | 136 | } |
| 138 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | 137 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| 139 | 138 | ||
| 140 | const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b); | 139 | const nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfd_b); |
| 141 | defer allocator.free(nfkd_cf_nfd_b); | 140 | defer allocator.free(nfkd_cf_nfd_b); |
| 142 | const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); | 141 | const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); |
| 143 | defer allocator.free(cf_nfkd_cf_nfd_b); | 142 | defer allocator.free(cf_nfkd_cf_nfd_b); |
| 144 | const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); | 143 | const nfkd_cf_nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); |
| 145 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); | 144 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); |
| 146 | 145 | ||
| 147 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); | 146 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); |
| @@ -176,31 +175,27 @@ test "caseFold" { | |||
| 176 | test "compatCaselessMatch" { | 175 | test "compatCaselessMatch" { |
| 177 | const allocator = testing.allocator; | 176 | const allocator = testing.allocator; |
| 178 | 177 | ||
| 179 | var normalize = try Normalize.init(allocator); | 178 | try testing.expect(try compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); |
| 180 | defer normalize.deinit(allocator); | ||
| 181 | |||
| 182 | try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); | ||
| 183 | 179 | ||
| 184 | const a = "Héllo World! \u{3d3}"; | 180 | const a = "Héllo World! \u{3d3}"; |
| 185 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 181 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 186 | try testing.expect(try compatCaselessMatch(allocator, normalize, a, b)); | 182 | try testing.expect(try compatCaselessMatch(allocator, a, b)); |
| 187 | 183 | ||
| 188 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 184 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 189 | try testing.expect(try compatCaselessMatch(allocator, normalize, a, c)); | 185 | try testing.expect(try compatCaselessMatch(allocator, a, c)); |
| 190 | } | 186 | } |
| 191 | 187 | ||
| 192 | /// Performs canonical caseless string matching by decomposing to NFD. This is | 188 | /// Performs canonical caseless string matching by decomposing to NFD. This is |
| 193 | /// faster than `compatCaselessMatch`, but less comprehensive. | 189 | /// faster than `compatCaselessMatch`, but less comprehensive. |
| 194 | pub fn canonCaselessMatch( | 190 | pub fn canonCaselessMatch( |
| 195 | allocator: Allocator, | 191 | allocator: Allocator, |
| 196 | normalize: Normalize, | ||
| 197 | a: []const u8, | 192 | a: []const u8, |
| 198 | b: []const u8, | 193 | b: []const u8, |
| 199 | ) Allocator.Error!bool { | 194 | ) Allocator.Error!bool { |
| 200 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 195 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 201 | 196 | ||
| 202 | // Process a | 197 | // Process a |
| 203 | const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); | 198 | const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); |
| 204 | defer allocator.free(nfd_a); | 199 | defer allocator.free(nfd_a); |
| 205 | 200 | ||
| 206 | var need_free_cf_nfd_a = false; | 201 | var need_free_cf_nfd_a = false; |
| @@ -214,13 +209,13 @@ pub fn canonCaselessMatch( | |||
| 214 | var need_free_nfd_cf_nfd_a = false; | 209 | var need_free_nfd_cf_nfd_a = false; |
| 215 | var nfd_cf_nfd_a = cf_nfd_a; | 210 | var nfd_cf_nfd_a = cf_nfd_a; |
| 216 | if (!need_free_cf_nfd_a) { | 211 | if (!need_free_cf_nfd_a) { |
| 217 | nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a); | 212 | nfd_cf_nfd_a = try Normalize.nfdCodePoints(allocator, cf_nfd_a); |
| 218 | need_free_nfd_cf_nfd_a = true; | 213 | need_free_nfd_cf_nfd_a = true; |
| 219 | } | 214 | } |
| 220 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); | 215 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); |
| 221 | 216 | ||
| 222 | // Process b | 217 | // Process b |
| 223 | const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); | 218 | const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); |
| 224 | defer allocator.free(nfd_b); | 219 | defer allocator.free(nfd_b); |
| 225 | 220 | ||
| 226 | var need_free_cf_nfd_b = false; | 221 | var need_free_cf_nfd_b = false; |
| @@ -234,7 +229,7 @@ pub fn canonCaselessMatch( | |||
| 234 | var need_free_nfd_cf_nfd_b = false; | 229 | var need_free_nfd_cf_nfd_b = false; |
| 235 | var nfd_cf_nfd_b = cf_nfd_b; | 230 | var nfd_cf_nfd_b = cf_nfd_b; |
| 236 | if (!need_free_cf_nfd_b) { | 231 | if (!need_free_cf_nfd_b) { |
| 237 | nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b); | 232 | nfd_cf_nfd_b = try Normalize.nfdCodePoints(allocator, cf_nfd_b); |
| 238 | need_free_nfd_cf_nfd_b = true; | 233 | need_free_nfd_cf_nfd_b = true; |
| 239 | } | 234 | } |
| 240 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); | 235 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); |
| @@ -245,17 +240,14 @@ pub fn canonCaselessMatch( | |||
| 245 | test "canonCaselessMatch" { | 240 | test "canonCaselessMatch" { |
| 246 | const allocator = testing.allocator; | 241 | const allocator = testing.allocator; |
| 247 | 242 | ||
| 248 | var normalize = try Normalize.init(allocator); | 243 | try testing.expect(try canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); |
| 249 | defer normalize.deinit(allocator); | ||
| 250 | |||
| 251 | try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); | ||
| 252 | 244 | ||
| 253 | const a = "Héllo World! \u{3d3}"; | 245 | const a = "Héllo World! \u{3d3}"; |
| 254 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 246 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 255 | try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b)); | 247 | try testing.expect(!try canonCaselessMatch(allocator, a, b)); |
| 256 | 248 | ||
| 257 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 249 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 258 | try testing.expect(try canonCaselessMatch(allocator, normalize, a, c)); | 250 | try testing.expect(try canonCaselessMatch(allocator, a, c)); |
| 259 | } | 251 | } |
| 260 | 252 | ||
| 261 | const std = @import("std"); | 253 | const std = @import("std"); |