summaryrefslogtreecommitdiff
path: root/src/CaseFolding.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/CaseFolding.zig')
-rw-r--r--src/CaseFolding.zig40
1 files changed, 16 insertions, 24 deletions
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index 88f047c..d69cddc 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -100,14 +100,13 @@ fn isCwcfException(cp: u21) bool {
100/// comprehensive comparison possible, but slower than `canonCaselessMatch`. 100/// comprehensive comparison possible, but slower than `canonCaselessMatch`.
101pub fn compatCaselessMatch( 101pub fn compatCaselessMatch(
102 allocator: Allocator, 102 allocator: Allocator,
103 normalize: Normalize,
104 a: []const u8, 103 a: []const u8,
105 b: []const u8, 104 b: []const u8,
106) Allocator.Error!bool { 105) Allocator.Error!bool {
107 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); 106 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
108 107
109 // Process a 108 // Process a
110 const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); 109 const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd);
111 defer allocator.free(nfd_a); 110 defer allocator.free(nfd_a);
112 111
113 var need_free_cf_nfd_a = false; 112 var need_free_cf_nfd_a = false;
@@ -118,15 +117,15 @@ pub fn compatCaselessMatch(
118 } 117 }
119 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); 118 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
120 119
121 const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a); 120 const nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfd_a);
122 defer allocator.free(nfkd_cf_nfd_a); 121 defer allocator.free(nfkd_cf_nfd_a);
123 const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); 122 const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a);
124 defer allocator.free(cf_nfkd_cf_nfd_a); 123 defer allocator.free(cf_nfkd_cf_nfd_a);
125 const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); 124 const nfkd_cf_nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
126 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); 125 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
127 126
128 // Process b 127 // Process b
129 const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); 128 const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd);
130 defer allocator.free(nfd_b); 129 defer allocator.free(nfd_b);
131 130
132 var need_free_cf_nfd_b = false; 131 var need_free_cf_nfd_b = false;
@@ -137,11 +136,11 @@ pub fn compatCaselessMatch(
137 } 136 }
138 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); 137 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
139 138
140 const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b); 139 const nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfd_b);
141 defer allocator.free(nfkd_cf_nfd_b); 140 defer allocator.free(nfkd_cf_nfd_b);
142 const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); 141 const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b);
143 defer allocator.free(cf_nfkd_cf_nfd_b); 142 defer allocator.free(cf_nfkd_cf_nfd_b);
144 const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); 143 const nfkd_cf_nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
145 defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); 144 defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
146 145
147 return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); 146 return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
@@ -176,31 +175,27 @@ test "caseFold" {
176test "compatCaselessMatch" { 175test "compatCaselessMatch" {
177 const allocator = testing.allocator; 176 const allocator = testing.allocator;
178 177
179 var normalize = try Normalize.init(allocator); 178 try testing.expect(try compatCaselessMatch(allocator, "ascii only!", "ASCII Only!"));
180 defer normalize.deinit(allocator);
181
182 try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!"));
183 179
184 const a = "Héllo World! \u{3d3}"; 180 const a = "Héllo World! \u{3d3}";
185 const b = "He\u{301}llo World! \u{3a5}\u{301}"; 181 const b = "He\u{301}llo World! \u{3a5}\u{301}";
186 try testing.expect(try compatCaselessMatch(allocator, normalize, a, b)); 182 try testing.expect(try compatCaselessMatch(allocator, a, b));
187 183
188 const c = "He\u{301}llo World! \u{3d2}\u{301}"; 184 const c = "He\u{301}llo World! \u{3d2}\u{301}";
189 try testing.expect(try compatCaselessMatch(allocator, normalize, a, c)); 185 try testing.expect(try compatCaselessMatch(allocator, a, c));
190} 186}
191 187
192/// Performs canonical caseless string matching by decomposing to NFD. This is 188/// Performs canonical caseless string matching by decomposing to NFD. This is
193/// faster than `compatCaselessMatch`, but less comprehensive. 189/// faster than `compatCaselessMatch`, but less comprehensive.
194pub fn canonCaselessMatch( 190pub fn canonCaselessMatch(
195 allocator: Allocator, 191 allocator: Allocator,
196 normalize: Normalize,
197 a: []const u8, 192 a: []const u8,
198 b: []const u8, 193 b: []const u8,
199) Allocator.Error!bool { 194) Allocator.Error!bool {
200 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); 195 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
201 196
202 // Process a 197 // Process a
203 const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); 198 const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd);
204 defer allocator.free(nfd_a); 199 defer allocator.free(nfd_a);
205 200
206 var need_free_cf_nfd_a = false; 201 var need_free_cf_nfd_a = false;
@@ -214,13 +209,13 @@ pub fn canonCaselessMatch(
214 var need_free_nfd_cf_nfd_a = false; 209 var need_free_nfd_cf_nfd_a = false;
215 var nfd_cf_nfd_a = cf_nfd_a; 210 var nfd_cf_nfd_a = cf_nfd_a;
216 if (!need_free_cf_nfd_a) { 211 if (!need_free_cf_nfd_a) {
217 nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a); 212 nfd_cf_nfd_a = try Normalize.nfdCodePoints(allocator, cf_nfd_a);
218 need_free_nfd_cf_nfd_a = true; 213 need_free_nfd_cf_nfd_a = true;
219 } 214 }
220 defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); 215 defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
221 216
222 // Process b 217 // Process b
223 const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); 218 const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd);
224 defer allocator.free(nfd_b); 219 defer allocator.free(nfd_b);
225 220
226 var need_free_cf_nfd_b = false; 221 var need_free_cf_nfd_b = false;
@@ -234,7 +229,7 @@ pub fn canonCaselessMatch(
234 var need_free_nfd_cf_nfd_b = false; 229 var need_free_nfd_cf_nfd_b = false;
235 var nfd_cf_nfd_b = cf_nfd_b; 230 var nfd_cf_nfd_b = cf_nfd_b;
236 if (!need_free_cf_nfd_b) { 231 if (!need_free_cf_nfd_b) {
237 nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b); 232 nfd_cf_nfd_b = try Normalize.nfdCodePoints(allocator, cf_nfd_b);
238 need_free_nfd_cf_nfd_b = true; 233 need_free_nfd_cf_nfd_b = true;
239 } 234 }
240 defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); 235 defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
@@ -245,17 +240,14 @@ pub fn canonCaselessMatch(
245test "canonCaselessMatch" { 240test "canonCaselessMatch" {
246 const allocator = testing.allocator; 241 const allocator = testing.allocator;
247 242
248 var normalize = try Normalize.init(allocator); 243 try testing.expect(try canonCaselessMatch(allocator, "ascii only!", "ASCII Only!"));
249 defer normalize.deinit(allocator);
250
251 try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!"));
252 244
253 const a = "Héllo World! \u{3d3}"; 245 const a = "Héllo World! \u{3d3}";
254 const b = "He\u{301}llo World! \u{3a5}\u{301}"; 246 const b = "He\u{301}llo World! \u{3a5}\u{301}";
255 try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b)); 247 try testing.expect(!try canonCaselessMatch(allocator, a, b));
256 248
257 const c = "He\u{301}llo World! \u{3d2}\u{301}"; 249 const c = "He\u{301}llo World! \u{3d2}\u{301}";
258 try testing.expect(try canonCaselessMatch(allocator, normalize, a, c)); 250 try testing.expect(try canonCaselessMatch(allocator, a, c));
259} 251}
260 252
261const std = @import("std"); 253const std = @import("std");