diff options
| author | 2026-02-04 18:01:36 -0500 | |
|---|---|---|
| committer | 2026-02-04 18:01:36 -0500 | |
| commit | ba5d9081b479e95ffa7f3baf751beedd370cec14 (patch) | |
| tree | c12041d8aab9f9ff68b25a2e2c9042073c3d5f61 /src/CaseFolding.zig | |
| parent | Convert Words module to no-allocation (diff) | |
| download | zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.gz zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.xz zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.zip | |
Normalization and case folding
Both of which deserve some further attention.
Diffstat (limited to 'src/CaseFolding.zig')
| -rw-r--r-- | src/CaseFolding.zig | 258 |
1 files changed, 100 insertions, 158 deletions
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index df86b92..88f047c 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig | |||
| @@ -1,113 +1,53 @@ | |||
| 1 | cutoff: u21 = undefined, | ||
| 2 | cwcf_exceptions_min: u21 = undefined, | ||
| 3 | cwcf_exceptions_max: u21 = undefined, | ||
| 4 | cwcf_exceptions: []u21 = undefined, | ||
| 5 | multiple_start: u21 = undefined, | ||
| 6 | stage1: []u8 = undefined, | ||
| 7 | stage2: []u8 = undefined, | ||
| 8 | stage3: []i24 = undefined, | ||
| 9 | normalize: Normalize, | ||
| 10 | owns_normalize: bool, | ||
| 11 | |||
| 12 | const CaseFolding = @This(); | 1 | const CaseFolding = @This(); |
| 13 | 2 | ||
| 14 | pub fn init(allocator: Allocator) Allocator.Error!CaseFolding { | 3 | const Data = struct { |
| 15 | var case_fold: CaseFolding = undefined; | 4 | cutoff: u21 = undefined, |
| 16 | try case_fold.setup(allocator); | 5 | cwcf_exceptions_min: u21 = undefined, |
| 17 | return case_fold; | 6 | cwcf_exceptions_max: u21 = undefined, |
| 18 | } | 7 | cwcf_exceptions: []const u21 = undefined, |
| 19 | 8 | multiple_start: u21 = undefined, | |
| 20 | pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding { | 9 | stage1: []const u8 = undefined, |
| 21 | var casefold: CaseFolding = undefined; | 10 | stage2: []const u8 = undefined, |
| 22 | try casefold.setupWithNormalize(allocator, norm); | 11 | stage3: []const i24 = undefined, |
| 23 | return casefold; | 12 | }; |
| 24 | } | 13 | |
| 25 | 14 | const casefold = casefold: { | |
| 26 | pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { | 15 | const data = @import("fold"); |
| 27 | try casefold.setupImpl(allocator); | 16 | break :casefold Data{ |
| 28 | // Handle normalize memory separately during setup: | 17 | .cutoff = data.cutoff, |
| 29 | casefold.owns_normalize = false; | 18 | .multiple_start = data.multiple_start, |
| 30 | errdefer casefold.deinit(allocator); | 19 | .stage1 = &data.stage1, |
| 31 | try casefold.normalize.setup(allocator); | 20 | .stage2 = &data.stage2, |
| 32 | casefold.owns_normalize = true; | 21 | .stage3 = &data.stage3, |
| 33 | } | 22 | .cwcf_exceptions_min = data.cwcf_exceptions_min, |
| 34 | 23 | .cwcf_exceptions_max = data.cwcf_exceptions_max, | |
| 35 | pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: Normalize) !void { | 24 | .cwcf_exceptions = &data.cwcf_exceptions, |
| 36 | try casefold.setupImpl(allocator); | ||
| 37 | casefold.normalize = norm; | ||
| 38 | casefold.owns_normalize = false; | ||
| 39 | } | ||
| 40 | |||
| 41 | fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { | ||
| 42 | casefold.setupImplInner(allocator) catch |err| { | ||
| 43 | switch (err) { | ||
| 44 | error.OutOfMemory => |e| return e, | ||
| 45 | else => unreachable, | ||
| 46 | } | ||
| 47 | }; | 25 | }; |
| 48 | } | 26 | }; |
| 49 | |||
| 50 | inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { | ||
| 51 | const in_bytes = @embedFile("fold"); | ||
| 52 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 53 | var reader = in_fbs.reader(); | ||
| 54 | |||
| 55 | const endian = builtin.cpu.arch.endian(); | ||
| 56 | |||
| 57 | casefold.cutoff = @intCast(try reader.readInt(u24, endian)); | ||
| 58 | casefold.multiple_start = @intCast(try reader.readInt(u24, endian)); | ||
| 59 | |||
| 60 | var len = try reader.readInt(u16, endian); | ||
| 61 | casefold.stage1 = try allocator.alloc(u8, len); | ||
| 62 | errdefer allocator.free(casefold.stage1); | ||
| 63 | for (0..len) |i| casefold.stage1[i] = try reader.readInt(u8, endian); | ||
| 64 | |||
| 65 | len = try reader.readInt(u16, endian); | ||
| 66 | casefold.stage2 = try allocator.alloc(u8, len); | ||
| 67 | errdefer allocator.free(casefold.stage2); | ||
| 68 | for (0..len) |i| casefold.stage2[i] = try reader.readInt(u8, endian); | ||
| 69 | |||
| 70 | len = try reader.readInt(u16, endian); | ||
| 71 | casefold.stage3 = try allocator.alloc(i24, len); | ||
| 72 | errdefer allocator.free(casefold.stage3); | ||
| 73 | for (0..len) |i| casefold.stage3[i] = try reader.readInt(i24, endian); | ||
| 74 | |||
| 75 | casefold.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian)); | ||
| 76 | casefold.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian)); | ||
| 77 | len = try reader.readInt(u16, endian); | ||
| 78 | casefold.cwcf_exceptions = try allocator.alloc(u21, len); | ||
| 79 | errdefer allocator.free(casefold.cwcf_exceptions); | ||
| 80 | for (0..len) |i| casefold.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian)); | ||
| 81 | } | ||
| 82 | |||
| 83 | pub fn deinit(fdata: *const CaseFolding, allocator: mem.Allocator) void { | ||
| 84 | allocator.free(fdata.stage1); | ||
| 85 | allocator.free(fdata.stage2); | ||
| 86 | allocator.free(fdata.stage3); | ||
| 87 | allocator.free(fdata.cwcf_exceptions); | ||
| 88 | if (fdata.owns_normalize) fdata.normalize.deinit(allocator); | ||
| 89 | } | ||
| 90 | 27 | ||
| 91 | /// Returns the case fold for `cp`. | 28 | /// Returns the case fold for `cp`. |
| 92 | pub fn caseFold(fdata: *const CaseFolding, cp: u21, buf: []u21) []const u21 { | 29 | pub fn caseFold(cp: u21, buf: []u21) []const u21 { |
| 93 | if (cp >= fdata.cutoff) return &.{}; | 30 | // Unmatched code points fold to themselves, so we default to this. |
| 31 | buf[0] = cp; | ||
| 94 | 32 | ||
| 95 | const stage1_val = fdata.stage1[cp >> 8]; | 33 | if (cp >= casefold.cutoff) return buf[0..1]; |
| 96 | if (stage1_val == 0) return &.{}; | 34 | |
| 35 | const stage1_val = casefold.stage1[cp >> 8]; | ||
| 36 | if (stage1_val == 0) return buf[0..1]; | ||
| 97 | 37 | ||
| 98 | const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF); | 38 | const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF); |
| 99 | const stage3_index = fdata.stage2[stage2_index]; | 39 | const stage3_index = casefold.stage2[stage2_index]; |
| 100 | 40 | ||
| 101 | if (stage3_index & 0x80 != 0) { | 41 | if (stage3_index & 0x80 != 0) { |
| 102 | const real_index = @as(usize, fdata.multiple_start) + (stage3_index ^ 0x80) * 3; | 42 | const real_index = @as(usize, casefold.multiple_start) + (stage3_index ^ 0x80) * 3; |
| 103 | const mapping = mem.sliceTo(fdata.stage3[real_index..][0..3], 0); | 43 | const mapping = mem.sliceTo(casefold.stage3[real_index..][0..3], 0); |
| 104 | for (mapping, 0..) |c, i| buf[i] = @intCast(c); | 44 | for (mapping, 0..) |c, i| buf[i] = @intCast(c); |
| 105 | 45 | ||
| 106 | return buf[0..mapping.len]; | 46 | return buf[0..mapping.len]; |
| 107 | } | 47 | } |
| 108 | 48 | ||
| 109 | const offset = fdata.stage3[stage3_index]; | 49 | const offset = casefold.stage3[stage3_index]; |
| 110 | if (offset == 0) return &.{}; | 50 | if (offset == 0) return buf[0..1]; |
| 111 | 51 | ||
| 112 | buf[0] = @intCast(@as(i32, cp) + offset); | 52 | buf[0] = @intCast(@as(i32, cp) + offset); |
| 113 | 53 | ||
| @@ -117,7 +57,6 @@ pub fn caseFold(fdata: *const CaseFolding, cp: u21, buf: []u21) []const u21 { | |||
| 117 | /// Produces the case folded code points for `cps`. Caller must free returned | 57 | /// Produces the case folded code points for `cps`. Caller must free returned |
| 118 | /// slice with `allocator`. | 58 | /// slice with `allocator`. |
| 119 | pub fn caseFoldAlloc( | 59 | pub fn caseFoldAlloc( |
| 120 | casefold: *const CaseFolding, | ||
| 121 | allocator: Allocator, | 60 | allocator: Allocator, |
| 122 | cps: []const u21, | 61 | cps: []const u21, |
| 123 | ) Allocator.Error![]const u21 { | 62 | ) Allocator.Error![]const u21 { |
| @@ -126,7 +65,7 @@ pub fn caseFoldAlloc( | |||
| 126 | var buf: [3]u21 = undefined; | 65 | var buf: [3]u21 = undefined; |
| 127 | 66 | ||
| 128 | for (cps) |cp| { | 67 | for (cps) |cp| { |
| 129 | const cf = casefold.caseFold(cp, &buf); | 68 | const cf = CaseFolding.caseFold(cp, &buf); |
| 130 | 69 | ||
| 131 | if (cf.len == 0) { | 70 | if (cf.len == 0) { |
| 132 | try cfcps.append(cp); | 71 | try cfcps.append(cp); |
| @@ -139,19 +78,19 @@ pub fn caseFoldAlloc( | |||
| 139 | } | 78 | } |
| 140 | 79 | ||
| 141 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). | 80 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). |
| 142 | pub fn cpChangesWhenCaseFolded(casefold: *const CaseFolding, cp: u21) bool { | 81 | pub fn cpChangesWhenCaseFolded(cp: u21) bool { |
| 143 | var buf: [3]u21 = undefined; | 82 | var buf: [3]u21 = undefined; |
| 144 | const has_mapping = casefold.caseFold(cp, &buf).len != 0; | 83 | const has_mapping = CaseFolding.caseFold(cp, &buf).len != 0; |
| 145 | return has_mapping and !casefold.isCwcfException(cp); | 84 | return has_mapping and !CaseFolding.isCwcfException(cp); |
| 146 | } | 85 | } |
| 147 | 86 | ||
| 148 | pub fn changesWhenCaseFolded(casefold: *const CaseFolding, cps: []const u21) bool { | 87 | pub fn changesWhenCaseFolded(cps: []const u21) bool { |
| 149 | return for (cps) |cp| { | 88 | return for (cps) |cp| { |
| 150 | if (casefold.cpChangesWhenCaseFolded(cp)) break true; | 89 | if (CaseFolding.cpChangesWhenCaseFolded(cp)) break true; |
| 151 | } else false; | 90 | } else false; |
| 152 | } | 91 | } |
| 153 | 92 | ||
| 154 | fn isCwcfException(casefold: *const CaseFolding, cp: u21) bool { | 93 | fn isCwcfException(cp: u21) bool { |
| 155 | return cp >= casefold.cwcf_exceptions_min and | 94 | return cp >= casefold.cwcf_exceptions_min and |
| 156 | cp <= casefold.cwcf_exceptions_max and | 95 | cp <= casefold.cwcf_exceptions_max and |
| 157 | std.mem.indexOfScalar(u21, casefold.cwcf_exceptions, cp) != null; | 96 | std.mem.indexOfScalar(u21, casefold.cwcf_exceptions, cp) != null; |
| @@ -160,88 +99,114 @@ fn isCwcfException(casefold: *const CaseFolding, cp: u21) bool { | |||
| 160 | /// Caseless compare `a` and `b` by decomposing to NFKD. This is the most | 99 | /// Caseless compare `a` and `b` by decomposing to NFKD. This is the most |
| 161 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. | 100 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. |
| 162 | pub fn compatCaselessMatch( | 101 | pub fn compatCaselessMatch( |
| 163 | casefold: *const CaseFolding, | ||
| 164 | allocator: Allocator, | 102 | allocator: Allocator, |
| 103 | normalize: Normalize, | ||
| 165 | a: []const u8, | 104 | a: []const u8, |
| 166 | b: []const u8, | 105 | b: []const u8, |
| 167 | ) Allocator.Error!bool { | 106 | ) Allocator.Error!bool { |
| 168 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 107 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 169 | 108 | ||
| 170 | // Process a | 109 | // Process a |
| 171 | const nfd_a = try casefold.normalize.nfxdCodePoints(allocator, a, .nfd); | 110 | const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); |
| 172 | defer allocator.free(nfd_a); | 111 | defer allocator.free(nfd_a); |
| 173 | 112 | ||
| 174 | var need_free_cf_nfd_a = false; | 113 | var need_free_cf_nfd_a = false; |
| 175 | var cf_nfd_a: []const u21 = nfd_a; | 114 | var cf_nfd_a: []const u21 = nfd_a; |
| 176 | if (casefold.changesWhenCaseFolded(nfd_a)) { | 115 | if (CaseFolding.changesWhenCaseFolded(nfd_a)) { |
| 177 | cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfd_a); | 116 | cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfd_a); |
| 178 | need_free_cf_nfd_a = true; | 117 | need_free_cf_nfd_a = true; |
| 179 | } | 118 | } |
| 180 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | 119 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| 181 | 120 | ||
| 182 | const nfkd_cf_nfd_a = try casefold.normalize.nfkdCodePoints(allocator, cf_nfd_a); | 121 | const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a); |
| 183 | defer allocator.free(nfkd_cf_nfd_a); | 122 | defer allocator.free(nfkd_cf_nfd_a); |
| 184 | const cf_nfkd_cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfkd_cf_nfd_a); | 123 | const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); |
| 185 | defer allocator.free(cf_nfkd_cf_nfd_a); | 124 | defer allocator.free(cf_nfkd_cf_nfd_a); |
| 186 | const nfkd_cf_nfkd_cf_nfd_a = try casefold.normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); | 125 | const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); |
| 187 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); | 126 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); |
| 188 | 127 | ||
| 189 | // Process b | 128 | // Process b |
| 190 | const nfd_b = try casefold.normalize.nfxdCodePoints(allocator, b, .nfd); | 129 | const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); |
| 191 | defer allocator.free(nfd_b); | 130 | defer allocator.free(nfd_b); |
| 192 | 131 | ||
| 193 | var need_free_cf_nfd_b = false; | 132 | var need_free_cf_nfd_b = false; |
| 194 | var cf_nfd_b: []const u21 = nfd_b; | 133 | var cf_nfd_b: []const u21 = nfd_b; |
| 195 | if (casefold.changesWhenCaseFolded(nfd_b)) { | 134 | if (CaseFolding.changesWhenCaseFolded(nfd_b)) { |
| 196 | cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfd_b); | 135 | cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfd_b); |
| 197 | need_free_cf_nfd_b = true; | 136 | need_free_cf_nfd_b = true; |
| 198 | } | 137 | } |
| 199 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | 138 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| 200 | 139 | ||
| 201 | const nfkd_cf_nfd_b = try casefold.normalize.nfkdCodePoints(allocator, cf_nfd_b); | 140 | const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b); |
| 202 | defer allocator.free(nfkd_cf_nfd_b); | 141 | defer allocator.free(nfkd_cf_nfd_b); |
| 203 | const cf_nfkd_cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfkd_cf_nfd_b); | 142 | const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); |
| 204 | defer allocator.free(cf_nfkd_cf_nfd_b); | 143 | defer allocator.free(cf_nfkd_cf_nfd_b); |
| 205 | const nfkd_cf_nfkd_cf_nfd_b = try casefold.normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); | 144 | const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); |
| 206 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); | 145 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); |
| 207 | 146 | ||
| 208 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); | 147 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); |
| 209 | } | 148 | } |
| 210 | 149 | ||
| 150 | test "caseFold" { | ||
| 151 | var buf: [3]u21 = undefined; | ||
| 152 | |||
| 153 | // Folds '1' to '1' | ||
| 154 | try testing.expectEqual(1, caseFold('1', &buf).len); | ||
| 155 | try testing.expectEqual('1', caseFold('1', &buf)[0]); | ||
| 156 | |||
| 157 | // Folds '2' to '2' | ||
| 158 | try testing.expectEqual(1, caseFold('2', &buf).len); | ||
| 159 | try testing.expectEqual('2', caseFold('2', &buf)[0]); | ||
| 160 | |||
| 161 | // Folds Armenian capital letter 'Zhe' (U+053A) | ||
| 162 | try testing.expectEqual(1, caseFold('Ժ', &buf).len); | ||
| 163 | // Armenian small letter 'Zhe' (U+056A) | ||
| 164 | try testing.expectEqual('ժ', caseFold('Ժ', &buf)[0]); | ||
| 165 | |||
| 166 | // Folds Greek small letter Upsilon with Dialytika and Perispomeni (U+1FE7) | ||
| 167 | try testing.expectEqual(3, caseFold('ῧ', &buf).len); | ||
| 168 | // Greek small letter Upsilon (U+03C5) | ||
| 169 | try testing.expectEqual('υ', caseFold('ῧ', &buf)[0]); | ||
| 170 | // Combining Diaeresis | ||
| 171 | try testing.expectEqual('\u{0308}', caseFold('ῧ', &buf)[1]); | ||
| 172 | // Combining Greek Perispomeni | ||
| 173 | try testing.expectEqual('\u{0342}', caseFold('ῧ', &buf)[2]); | ||
| 174 | } | ||
| 175 | |||
| 211 | test "compatCaselessMatch" { | 176 | test "compatCaselessMatch" { |
| 212 | const allocator = testing.allocator; | 177 | const allocator = testing.allocator; |
| 213 | 178 | ||
| 214 | const caser = try CaseFolding.init(allocator); | 179 | var normalize = try Normalize.init(allocator); |
| 215 | defer caser.deinit(allocator); | 180 | defer normalize.deinit(allocator); |
| 216 | 181 | ||
| 217 | try testing.expect(try caser.compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); | 182 | try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); |
| 218 | 183 | ||
| 219 | const a = "Héllo World! \u{3d3}"; | 184 | const a = "Héllo World! \u{3d3}"; |
| 220 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 185 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 221 | try testing.expect(try caser.compatCaselessMatch(allocator, a, b)); | 186 | try testing.expect(try compatCaselessMatch(allocator, normalize, a, b)); |
| 222 | 187 | ||
| 223 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 188 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 224 | try testing.expect(try caser.compatCaselessMatch(allocator, a, c)); | 189 | try testing.expect(try compatCaselessMatch(allocator, normalize, a, c)); |
| 225 | } | 190 | } |
| 226 | 191 | ||
| 227 | /// Performs canonical caseless string matching by decomposing to NFD. This is | 192 | /// Performs canonical caseless string matching by decomposing to NFD. This is |
| 228 | /// faster than `compatCaselessMatch`, but less comprehensive. | 193 | /// faster than `compatCaselessMatch`, but less comprehensive. |
| 229 | pub fn canonCaselessMatch( | 194 | pub fn canonCaselessMatch( |
| 230 | casefold: *const CaseFolding, | ||
| 231 | allocator: Allocator, | 195 | allocator: Allocator, |
| 196 | normalize: Normalize, | ||
| 232 | a: []const u8, | 197 | a: []const u8, |
| 233 | b: []const u8, | 198 | b: []const u8, |
| 234 | ) Allocator.Error!bool { | 199 | ) Allocator.Error!bool { |
| 235 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 200 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 236 | 201 | ||
| 237 | // Process a | 202 | // Process a |
| 238 | const nfd_a = try casefold.normalize.nfxdCodePoints(allocator, a, .nfd); | 203 | const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); |
| 239 | defer allocator.free(nfd_a); | 204 | defer allocator.free(nfd_a); |
| 240 | 205 | ||
| 241 | var need_free_cf_nfd_a = false; | 206 | var need_free_cf_nfd_a = false; |
| 242 | var cf_nfd_a: []const u21 = nfd_a; | 207 | var cf_nfd_a: []const u21 = nfd_a; |
| 243 | if (casefold.changesWhenCaseFolded(nfd_a)) { | 208 | if (CaseFolding.changesWhenCaseFolded(nfd_a)) { |
| 244 | cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfd_a); | 209 | cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfd_a); |
| 245 | need_free_cf_nfd_a = true; | 210 | need_free_cf_nfd_a = true; |
| 246 | } | 211 | } |
| 247 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | 212 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| @@ -249,19 +214,19 @@ pub fn canonCaselessMatch( | |||
| 249 | var need_free_nfd_cf_nfd_a = false; | 214 | var need_free_nfd_cf_nfd_a = false; |
| 250 | var nfd_cf_nfd_a = cf_nfd_a; | 215 | var nfd_cf_nfd_a = cf_nfd_a; |
| 251 | if (!need_free_cf_nfd_a) { | 216 | if (!need_free_cf_nfd_a) { |
| 252 | nfd_cf_nfd_a = try casefold.normalize.nfdCodePoints(allocator, cf_nfd_a); | 217 | nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a); |
| 253 | need_free_nfd_cf_nfd_a = true; | 218 | need_free_nfd_cf_nfd_a = true; |
| 254 | } | 219 | } |
| 255 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); | 220 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); |
| 256 | 221 | ||
| 257 | // Process b | 222 | // Process b |
| 258 | const nfd_b = try casefold.normalize.nfxdCodePoints(allocator, b, .nfd); | 223 | const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); |
| 259 | defer allocator.free(nfd_b); | 224 | defer allocator.free(nfd_b); |
| 260 | 225 | ||
| 261 | var need_free_cf_nfd_b = false; | 226 | var need_free_cf_nfd_b = false; |
| 262 | var cf_nfd_b: []const u21 = nfd_b; | 227 | var cf_nfd_b: []const u21 = nfd_b; |
| 263 | if (casefold.changesWhenCaseFolded(nfd_b)) { | 228 | if (CaseFolding.changesWhenCaseFolded(nfd_b)) { |
| 264 | cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfd_b); | 229 | cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfd_b); |
| 265 | need_free_cf_nfd_b = true; | 230 | need_free_cf_nfd_b = true; |
| 266 | } | 231 | } |
| 267 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | 232 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| @@ -269,7 +234,7 @@ pub fn canonCaselessMatch( | |||
| 269 | var need_free_nfd_cf_nfd_b = false; | 234 | var need_free_nfd_cf_nfd_b = false; |
| 270 | var nfd_cf_nfd_b = cf_nfd_b; | 235 | var nfd_cf_nfd_b = cf_nfd_b; |
| 271 | if (!need_free_cf_nfd_b) { | 236 | if (!need_free_cf_nfd_b) { |
| 272 | nfd_cf_nfd_b = try casefold.normalize.nfdCodePoints(allocator, cf_nfd_b); | 237 | nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b); |
| 273 | need_free_nfd_cf_nfd_b = true; | 238 | need_free_nfd_cf_nfd_b = true; |
| 274 | } | 239 | } |
| 275 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); | 240 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); |
| @@ -280,40 +245,17 @@ pub fn canonCaselessMatch( | |||
| 280 | test "canonCaselessMatch" { | 245 | test "canonCaselessMatch" { |
| 281 | const allocator = testing.allocator; | 246 | const allocator = testing.allocator; |
| 282 | 247 | ||
| 283 | const caser = try CaseFolding.init(allocator); | 248 | var normalize = try Normalize.init(allocator); |
| 284 | defer caser.deinit(allocator); | 249 | defer normalize.deinit(allocator); |
| 285 | 250 | ||
| 286 | try testing.expect(try caser.canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); | 251 | try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); |
| 287 | 252 | ||
| 288 | const a = "Héllo World! \u{3d3}"; | 253 | const a = "Héllo World! \u{3d3}"; |
| 289 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 254 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 290 | try testing.expect(!try caser.canonCaselessMatch(allocator, a, b)); | 255 | try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b)); |
| 291 | 256 | ||
| 292 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 257 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 293 | try testing.expect(try caser.canonCaselessMatch(allocator, a, c)); | 258 | try testing.expect(try canonCaselessMatch(allocator, normalize, a, c)); |
| 294 | } | ||
| 295 | |||
| 296 | fn testAllocations(allocator: Allocator) !void { | ||
| 297 | // With normalize provided | ||
| 298 | { | ||
| 299 | const normalize = try Normalize.init(allocator); | ||
| 300 | defer normalize.deinit(allocator); | ||
| 301 | const caser = try CaseFolding.initWithNormalize(allocator, normalize); | ||
| 302 | defer caser.deinit(allocator); | ||
| 303 | } | ||
| 304 | // With normalize owned | ||
| 305 | { | ||
| 306 | const caser = try CaseFolding.init(allocator); | ||
| 307 | defer caser.deinit(allocator); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 311 | test "Allocation Failures" { | ||
| 312 | try testing.checkAllAllocationFailures( | ||
| 313 | testing.allocator, | ||
| 314 | testAllocations, | ||
| 315 | .{}, | ||
| 316 | ); | ||
| 317 | } | 259 | } |
| 318 | 260 | ||
| 319 | const std = @import("std"); | 261 | const std = @import("std"); |