summaryrefslogtreecommitdiff
path: root/src/CaseFolding.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-04 18:01:36 -0500
committerGravatar Sam Atman2026-02-04 18:01:36 -0500
commitba5d9081b479e95ffa7f3baf751beedd370cec14 (patch)
treec12041d8aab9f9ff68b25a2e2c9042073c3d5f61 /src/CaseFolding.zig
parentConvert Words module to no-allocation (diff)
downloadzg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.gz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.xz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.zip
Normalization and case folding
Both of which deserve some further attention.
Diffstat (limited to 'src/CaseFolding.zig')
-rw-r--r--src/CaseFolding.zig258
1 files changed, 100 insertions, 158 deletions
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index df86b92..88f047c 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -1,113 +1,53 @@
1cutoff: u21 = undefined,
2cwcf_exceptions_min: u21 = undefined,
3cwcf_exceptions_max: u21 = undefined,
4cwcf_exceptions: []u21 = undefined,
5multiple_start: u21 = undefined,
6stage1: []u8 = undefined,
7stage2: []u8 = undefined,
8stage3: []i24 = undefined,
9normalize: Normalize,
10owns_normalize: bool,
11
12const CaseFolding = @This(); 1const CaseFolding = @This();
13 2
14pub fn init(allocator: Allocator) Allocator.Error!CaseFolding { 3const Data = struct {
15 var case_fold: CaseFolding = undefined; 4 cutoff: u21 = undefined,
16 try case_fold.setup(allocator); 5 cwcf_exceptions_min: u21 = undefined,
17 return case_fold; 6 cwcf_exceptions_max: u21 = undefined,
18} 7 cwcf_exceptions: []const u21 = undefined,
19 8 multiple_start: u21 = undefined,
20pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding { 9 stage1: []const u8 = undefined,
21 var casefold: CaseFolding = undefined; 10 stage2: []const u8 = undefined,
22 try casefold.setupWithNormalize(allocator, norm); 11 stage3: []const i24 = undefined,
23 return casefold; 12};
24} 13
25 14const casefold = casefold: {
26pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void { 15 const data = @import("fold");
27 try casefold.setupImpl(allocator); 16 break :casefold Data{
28 // Handle normalize memory separately during setup: 17 .cutoff = data.cutoff,
29 casefold.owns_normalize = false; 18 .multiple_start = data.multiple_start,
30 errdefer casefold.deinit(allocator); 19 .stage1 = &data.stage1,
31 try casefold.normalize.setup(allocator); 20 .stage2 = &data.stage2,
32 casefold.owns_normalize = true; 21 .stage3 = &data.stage3,
33} 22 .cwcf_exceptions_min = data.cwcf_exceptions_min,
34 23 .cwcf_exceptions_max = data.cwcf_exceptions_max,
35pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: Normalize) !void { 24 .cwcf_exceptions = &data.cwcf_exceptions,
36 try casefold.setupImpl(allocator);
37 casefold.normalize = norm;
38 casefold.owns_normalize = false;
39}
40
41fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void {
42 casefold.setupImplInner(allocator) catch |err| {
43 switch (err) {
44 error.OutOfMemory => |e| return e,
45 else => unreachable,
46 }
47 }; 25 };
48} 26};
49
50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void {
51 const in_bytes = @embedFile("fold");
52 var in_fbs = std.io.fixedBufferStream(in_bytes);
53 var reader = in_fbs.reader();
54
55 const endian = builtin.cpu.arch.endian();
56
57 casefold.cutoff = @intCast(try reader.readInt(u24, endian));
58 casefold.multiple_start = @intCast(try reader.readInt(u24, endian));
59
60 var len = try reader.readInt(u16, endian);
61 casefold.stage1 = try allocator.alloc(u8, len);
62 errdefer allocator.free(casefold.stage1);
63 for (0..len) |i| casefold.stage1[i] = try reader.readInt(u8, endian);
64
65 len = try reader.readInt(u16, endian);
66 casefold.stage2 = try allocator.alloc(u8, len);
67 errdefer allocator.free(casefold.stage2);
68 for (0..len) |i| casefold.stage2[i] = try reader.readInt(u8, endian);
69
70 len = try reader.readInt(u16, endian);
71 casefold.stage3 = try allocator.alloc(i24, len);
72 errdefer allocator.free(casefold.stage3);
73 for (0..len) |i| casefold.stage3[i] = try reader.readInt(i24, endian);
74
75 casefold.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian));
76 casefold.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian));
77 len = try reader.readInt(u16, endian);
78 casefold.cwcf_exceptions = try allocator.alloc(u21, len);
79 errdefer allocator.free(casefold.cwcf_exceptions);
80 for (0..len) |i| casefold.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian));
81}
82
83pub fn deinit(fdata: *const CaseFolding, allocator: mem.Allocator) void {
84 allocator.free(fdata.stage1);
85 allocator.free(fdata.stage2);
86 allocator.free(fdata.stage3);
87 allocator.free(fdata.cwcf_exceptions);
88 if (fdata.owns_normalize) fdata.normalize.deinit(allocator);
89}
90 27
91/// Returns the case fold for `cp`. 28/// Returns the case fold for `cp`.
92pub fn caseFold(fdata: *const CaseFolding, cp: u21, buf: []u21) []const u21 { 29pub fn caseFold(cp: u21, buf: []u21) []const u21 {
93 if (cp >= fdata.cutoff) return &.{}; 30 // Unmatched code points fold to themselves, so we default to this.
31 buf[0] = cp;
94 32
95 const stage1_val = fdata.stage1[cp >> 8]; 33 if (cp >= casefold.cutoff) return buf[0..1];
96 if (stage1_val == 0) return &.{}; 34
35 const stage1_val = casefold.stage1[cp >> 8];
36 if (stage1_val == 0) return buf[0..1];
97 37
98 const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF); 38 const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF);
99 const stage3_index = fdata.stage2[stage2_index]; 39 const stage3_index = casefold.stage2[stage2_index];
100 40
101 if (stage3_index & 0x80 != 0) { 41 if (stage3_index & 0x80 != 0) {
102 const real_index = @as(usize, fdata.multiple_start) + (stage3_index ^ 0x80) * 3; 42 const real_index = @as(usize, casefold.multiple_start) + (stage3_index ^ 0x80) * 3;
103 const mapping = mem.sliceTo(fdata.stage3[real_index..][0..3], 0); 43 const mapping = mem.sliceTo(casefold.stage3[real_index..][0..3], 0);
104 for (mapping, 0..) |c, i| buf[i] = @intCast(c); 44 for (mapping, 0..) |c, i| buf[i] = @intCast(c);
105 45
106 return buf[0..mapping.len]; 46 return buf[0..mapping.len];
107 } 47 }
108 48
109 const offset = fdata.stage3[stage3_index]; 49 const offset = casefold.stage3[stage3_index];
110 if (offset == 0) return &.{}; 50 if (offset == 0) return buf[0..1];
111 51
112 buf[0] = @intCast(@as(i32, cp) + offset); 52 buf[0] = @intCast(@as(i32, cp) + offset);
113 53
@@ -117,7 +57,6 @@ pub fn caseFold(fdata: *const CaseFolding, cp: u21, buf: []u21) []const u21 {
117/// Produces the case folded code points for `cps`. Caller must free returned 57/// Produces the case folded code points for `cps`. Caller must free returned
118/// slice with `allocator`. 58/// slice with `allocator`.
119pub fn caseFoldAlloc( 59pub fn caseFoldAlloc(
120 casefold: *const CaseFolding,
121 allocator: Allocator, 60 allocator: Allocator,
122 cps: []const u21, 61 cps: []const u21,
123) Allocator.Error![]const u21 { 62) Allocator.Error![]const u21 {
@@ -126,7 +65,7 @@ pub fn caseFoldAlloc(
126 var buf: [3]u21 = undefined; 65 var buf: [3]u21 = undefined;
127 66
128 for (cps) |cp| { 67 for (cps) |cp| {
129 const cf = casefold.caseFold(cp, &buf); 68 const cf = CaseFolding.caseFold(cp, &buf);
130 69
131 if (cf.len == 0) { 70 if (cf.len == 0) {
132 try cfcps.append(cp); 71 try cfcps.append(cp);
@@ -139,19 +78,19 @@ pub fn caseFoldAlloc(
139} 78}
140 79
141/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). 80/// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`).
142pub fn cpChangesWhenCaseFolded(casefold: *const CaseFolding, cp: u21) bool { 81pub fn cpChangesWhenCaseFolded(cp: u21) bool {
143 var buf: [3]u21 = undefined; 82 var buf: [3]u21 = undefined;
144 const has_mapping = casefold.caseFold(cp, &buf).len != 0; 83 const has_mapping = CaseFolding.caseFold(cp, &buf).len != 0;
145 return has_mapping and !casefold.isCwcfException(cp); 84 return has_mapping and !CaseFolding.isCwcfException(cp);
146} 85}
147 86
148pub fn changesWhenCaseFolded(casefold: *const CaseFolding, cps: []const u21) bool { 87pub fn changesWhenCaseFolded(cps: []const u21) bool {
149 return for (cps) |cp| { 88 return for (cps) |cp| {
150 if (casefold.cpChangesWhenCaseFolded(cp)) break true; 89 if (CaseFolding.cpChangesWhenCaseFolded(cp)) break true;
151 } else false; 90 } else false;
152} 91}
153 92
154fn isCwcfException(casefold: *const CaseFolding, cp: u21) bool { 93fn isCwcfException(cp: u21) bool {
155 return cp >= casefold.cwcf_exceptions_min and 94 return cp >= casefold.cwcf_exceptions_min and
156 cp <= casefold.cwcf_exceptions_max and 95 cp <= casefold.cwcf_exceptions_max and
157 std.mem.indexOfScalar(u21, casefold.cwcf_exceptions, cp) != null; 96 std.mem.indexOfScalar(u21, casefold.cwcf_exceptions, cp) != null;
@@ -160,88 +99,114 @@ fn isCwcfException(casefold: *const CaseFolding, cp: u21) bool {
160/// Caseless compare `a` and `b` by decomposing to NFKD. This is the most 99/// Caseless compare `a` and `b` by decomposing to NFKD. This is the most
161/// comprehensive comparison possible, but slower than `canonCaselessMatch`. 100/// comprehensive comparison possible, but slower than `canonCaselessMatch`.
162pub fn compatCaselessMatch( 101pub fn compatCaselessMatch(
163 casefold: *const CaseFolding,
164 allocator: Allocator, 102 allocator: Allocator,
103 normalize: Normalize,
165 a: []const u8, 104 a: []const u8,
166 b: []const u8, 105 b: []const u8,
167) Allocator.Error!bool { 106) Allocator.Error!bool {
168 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); 107 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
169 108
170 // Process a 109 // Process a
171 const nfd_a = try casefold.normalize.nfxdCodePoints(allocator, a, .nfd); 110 const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd);
172 defer allocator.free(nfd_a); 111 defer allocator.free(nfd_a);
173 112
174 var need_free_cf_nfd_a = false; 113 var need_free_cf_nfd_a = false;
175 var cf_nfd_a: []const u21 = nfd_a; 114 var cf_nfd_a: []const u21 = nfd_a;
176 if (casefold.changesWhenCaseFolded(nfd_a)) { 115 if (CaseFolding.changesWhenCaseFolded(nfd_a)) {
177 cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfd_a); 116 cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfd_a);
178 need_free_cf_nfd_a = true; 117 need_free_cf_nfd_a = true;
179 } 118 }
180 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); 119 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
181 120
182 const nfkd_cf_nfd_a = try casefold.normalize.nfkdCodePoints(allocator, cf_nfd_a); 121 const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a);
183 defer allocator.free(nfkd_cf_nfd_a); 122 defer allocator.free(nfkd_cf_nfd_a);
184 const cf_nfkd_cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfkd_cf_nfd_a); 123 const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a);
185 defer allocator.free(cf_nfkd_cf_nfd_a); 124 defer allocator.free(cf_nfkd_cf_nfd_a);
186 const nfkd_cf_nfkd_cf_nfd_a = try casefold.normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); 125 const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
187 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); 126 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
188 127
189 // Process b 128 // Process b
190 const nfd_b = try casefold.normalize.nfxdCodePoints(allocator, b, .nfd); 129 const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd);
191 defer allocator.free(nfd_b); 130 defer allocator.free(nfd_b);
192 131
193 var need_free_cf_nfd_b = false; 132 var need_free_cf_nfd_b = false;
194 var cf_nfd_b: []const u21 = nfd_b; 133 var cf_nfd_b: []const u21 = nfd_b;
195 if (casefold.changesWhenCaseFolded(nfd_b)) { 134 if (CaseFolding.changesWhenCaseFolded(nfd_b)) {
196 cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfd_b); 135 cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfd_b);
197 need_free_cf_nfd_b = true; 136 need_free_cf_nfd_b = true;
198 } 137 }
199 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); 138 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
200 139
201 const nfkd_cf_nfd_b = try casefold.normalize.nfkdCodePoints(allocator, cf_nfd_b); 140 const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b);
202 defer allocator.free(nfkd_cf_nfd_b); 141 defer allocator.free(nfkd_cf_nfd_b);
203 const cf_nfkd_cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfkd_cf_nfd_b); 142 const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b);
204 defer allocator.free(cf_nfkd_cf_nfd_b); 143 defer allocator.free(cf_nfkd_cf_nfd_b);
205 const nfkd_cf_nfkd_cf_nfd_b = try casefold.normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); 144 const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
206 defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); 145 defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
207 146
208 return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); 147 return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
209} 148}
210 149
150test "caseFold" {
151 var buf: [3]u21 = undefined;
152
153 // Folds '1' to '1'
154 try testing.expectEqual(1, caseFold('1', &buf).len);
155 try testing.expectEqual('1', caseFold('1', &buf)[0]);
156
157 // Folds '2' to '2'
158 try testing.expectEqual(1, caseFold('2', &buf).len);
159 try testing.expectEqual('2', caseFold('2', &buf)[0]);
160
161 // Folds Armenian capital letter 'Zhe' (U+053A)
162 try testing.expectEqual(1, caseFold('Ժ', &buf).len);
163 // Armenian small letter 'Zhe' (U+056A)
164 try testing.expectEqual('ժ', caseFold('Ժ', &buf)[0]);
165
166 // Folds Greek small letter Upsilon with Dialytika and Perispomeni (U+1FE7)
167 try testing.expectEqual(3, caseFold('ῧ', &buf).len);
168 // Greek small letter Upsilon (U+03C5)
169 try testing.expectEqual('υ', caseFold('ῧ', &buf)[0]);
170 // Combining Diaeresis
171 try testing.expectEqual('\u{0308}', caseFold('ῧ', &buf)[1]);
172 // Combining Greek Perispomeni
173 try testing.expectEqual('\u{0342}', caseFold('ῧ', &buf)[2]);
174}
175
211test "compatCaselessMatch" { 176test "compatCaselessMatch" {
212 const allocator = testing.allocator; 177 const allocator = testing.allocator;
213 178
214 const caser = try CaseFolding.init(allocator); 179 var normalize = try Normalize.init(allocator);
215 defer caser.deinit(allocator); 180 defer normalize.deinit(allocator);
216 181
217 try testing.expect(try caser.compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); 182 try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!"));
218 183
219 const a = "Héllo World! \u{3d3}"; 184 const a = "Héllo World! \u{3d3}";
220 const b = "He\u{301}llo World! \u{3a5}\u{301}"; 185 const b = "He\u{301}llo World! \u{3a5}\u{301}";
221 try testing.expect(try caser.compatCaselessMatch(allocator, a, b)); 186 try testing.expect(try compatCaselessMatch(allocator, normalize, a, b));
222 187
223 const c = "He\u{301}llo World! \u{3d2}\u{301}"; 188 const c = "He\u{301}llo World! \u{3d2}\u{301}";
224 try testing.expect(try caser.compatCaselessMatch(allocator, a, c)); 189 try testing.expect(try compatCaselessMatch(allocator, normalize, a, c));
225} 190}
226 191
227/// Performs canonical caseless string matching by decomposing to NFD. This is 192/// Performs canonical caseless string matching by decomposing to NFD. This is
228/// faster than `compatCaselessMatch`, but less comprehensive. 193/// faster than `compatCaselessMatch`, but less comprehensive.
229pub fn canonCaselessMatch( 194pub fn canonCaselessMatch(
230 casefold: *const CaseFolding,
231 allocator: Allocator, 195 allocator: Allocator,
196 normalize: Normalize,
232 a: []const u8, 197 a: []const u8,
233 b: []const u8, 198 b: []const u8,
234) Allocator.Error!bool { 199) Allocator.Error!bool {
235 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); 200 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
236 201
237 // Process a 202 // Process a
238 const nfd_a = try casefold.normalize.nfxdCodePoints(allocator, a, .nfd); 203 const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd);
239 defer allocator.free(nfd_a); 204 defer allocator.free(nfd_a);
240 205
241 var need_free_cf_nfd_a = false; 206 var need_free_cf_nfd_a = false;
242 var cf_nfd_a: []const u21 = nfd_a; 207 var cf_nfd_a: []const u21 = nfd_a;
243 if (casefold.changesWhenCaseFolded(nfd_a)) { 208 if (CaseFolding.changesWhenCaseFolded(nfd_a)) {
244 cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfd_a); 209 cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfd_a);
245 need_free_cf_nfd_a = true; 210 need_free_cf_nfd_a = true;
246 } 211 }
247 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); 212 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
@@ -249,19 +214,19 @@ pub fn canonCaselessMatch(
249 var need_free_nfd_cf_nfd_a = false; 214 var need_free_nfd_cf_nfd_a = false;
250 var nfd_cf_nfd_a = cf_nfd_a; 215 var nfd_cf_nfd_a = cf_nfd_a;
251 if (!need_free_cf_nfd_a) { 216 if (!need_free_cf_nfd_a) {
252 nfd_cf_nfd_a = try casefold.normalize.nfdCodePoints(allocator, cf_nfd_a); 217 nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a);
253 need_free_nfd_cf_nfd_a = true; 218 need_free_nfd_cf_nfd_a = true;
254 } 219 }
255 defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); 220 defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
256 221
257 // Process b 222 // Process b
258 const nfd_b = try casefold.normalize.nfxdCodePoints(allocator, b, .nfd); 223 const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd);
259 defer allocator.free(nfd_b); 224 defer allocator.free(nfd_b);
260 225
261 var need_free_cf_nfd_b = false; 226 var need_free_cf_nfd_b = false;
262 var cf_nfd_b: []const u21 = nfd_b; 227 var cf_nfd_b: []const u21 = nfd_b;
263 if (casefold.changesWhenCaseFolded(nfd_b)) { 228 if (CaseFolding.changesWhenCaseFolded(nfd_b)) {
264 cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfd_b); 229 cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfd_b);
265 need_free_cf_nfd_b = true; 230 need_free_cf_nfd_b = true;
266 } 231 }
267 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); 232 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
@@ -269,7 +234,7 @@ pub fn canonCaselessMatch(
269 var need_free_nfd_cf_nfd_b = false; 234 var need_free_nfd_cf_nfd_b = false;
270 var nfd_cf_nfd_b = cf_nfd_b; 235 var nfd_cf_nfd_b = cf_nfd_b;
271 if (!need_free_cf_nfd_b) { 236 if (!need_free_cf_nfd_b) {
272 nfd_cf_nfd_b = try casefold.normalize.nfdCodePoints(allocator, cf_nfd_b); 237 nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b);
273 need_free_nfd_cf_nfd_b = true; 238 need_free_nfd_cf_nfd_b = true;
274 } 239 }
275 defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); 240 defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
@@ -280,40 +245,17 @@ pub fn canonCaselessMatch(
280test "canonCaselessMatch" { 245test "canonCaselessMatch" {
281 const allocator = testing.allocator; 246 const allocator = testing.allocator;
282 247
283 const caser = try CaseFolding.init(allocator); 248 var normalize = try Normalize.init(allocator);
284 defer caser.deinit(allocator); 249 defer normalize.deinit(allocator);
285 250
286 try testing.expect(try caser.canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); 251 try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!"));
287 252
288 const a = "Héllo World! \u{3d3}"; 253 const a = "Héllo World! \u{3d3}";
289 const b = "He\u{301}llo World! \u{3a5}\u{301}"; 254 const b = "He\u{301}llo World! \u{3a5}\u{301}";
290 try testing.expect(!try caser.canonCaselessMatch(allocator, a, b)); 255 try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b));
291 256
292 const c = "He\u{301}llo World! \u{3d2}\u{301}"; 257 const c = "He\u{301}llo World! \u{3d2}\u{301}";
293 try testing.expect(try caser.canonCaselessMatch(allocator, a, c)); 258 try testing.expect(try canonCaselessMatch(allocator, normalize, a, c));
294}
295
296fn testAllocations(allocator: Allocator) !void {
297 // With normalize provided
298 {
299 const normalize = try Normalize.init(allocator);
300 defer normalize.deinit(allocator);
301 const caser = try CaseFolding.initWithNormalize(allocator, normalize);
302 defer caser.deinit(allocator);
303 }
304 // With normalize owned
305 {
306 const caser = try CaseFolding.init(allocator);
307 defer caser.deinit(allocator);
308 }
309}
310
311test "Allocation Failures" {
312 try testing.checkAllAllocationFailures(
313 testing.allocator,
314 testAllocations,
315 .{},
316 );
317} 259}
318 260
319const std = @import("std"); 261const std = @import("std");