summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-01 20:49:49 -0400
committerGravatar Jose Colon Rodriguez2024-03-01 20:49:49 -0400
commit68b01d794dcf145fb11603a238c647b7ca998f84 (patch)
tree5758ec9f6622105d4df7e2990c0e4708530d44ff
parentMoved case fold stuff to src/Caser.zig (diff)
downloadzg-68b01d794dcf145fb11603a238c647b7ca998f84.tar.gz
zg-68b01d794dcf145fb11603a238c647b7ca998f84.tar.xz
zg-68b01d794dcf145fb11603a238c647b7ca998f84.zip
Added canonical caseless match to Caser
-rw-r--r--src/Caser.zig85
-rw-r--r--src/Normalizer.zig25
-rw-r--r--src/main.zig2
3 files changed, 105 insertions, 7 deletions
diff --git a/src/Caser.zig b/src/Caser.zig
index d02370a..43a3a5b 100644
--- a/src/Caser.zig
+++ b/src/Caser.zig
@@ -50,13 +50,13 @@ pub fn compatCaselessMatch(
50 const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd); 50 const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
51 defer allocator.free(nfd_a); 51 defer allocator.free(nfd_a);
52 52
53 var need_frr_cf_nfd_a = false; 53 var need_free_cf_nfd_a = false;
54 var cf_nfd_a: []const u21 = nfd_a; 54 var cf_nfd_a: []const u21 = nfd_a;
55 if (self.changesWhenCaseFolded(nfd_a)) { 55 if (self.changesWhenCaseFolded(nfd_a)) {
56 cf_nfd_a = try self.caseFold(allocator, nfd_a); 56 cf_nfd_a = try self.caseFold(allocator, nfd_a);
57 need_frr_cf_nfd_a = true; 57 need_free_cf_nfd_a = true;
58 } 58 }
59 defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a); 59 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
60 60
61 const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a); 61 const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
62 defer allocator.free(nfkd_cf_nfd_a); 62 defer allocator.free(nfkd_cf_nfd_a);
@@ -69,13 +69,13 @@ pub fn compatCaselessMatch(
69 const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd); 69 const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
70 defer allocator.free(nfd_b); 70 defer allocator.free(nfd_b);
71 71
72 var need_frr_cf_nfd_b = false; 72 var need_free_cf_nfd_b = false;
73 var cf_nfd_b: []const u21 = nfd_b; 73 var cf_nfd_b: []const u21 = nfd_b;
74 if (self.changesWhenCaseFolded(nfd_b)) { 74 if (self.changesWhenCaseFolded(nfd_b)) {
75 cf_nfd_b = try self.caseFold(allocator, nfd_b); 75 cf_nfd_b = try self.caseFold(allocator, nfd_b);
76 need_frr_cf_nfd_b = true; 76 need_free_cf_nfd_b = true;
77 } 77 }
78 defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b); 78 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
79 79
80 const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b); 80 const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
81 defer allocator.free(nfkd_cf_nfd_b); 81 defer allocator.free(nfkd_cf_nfd_b);
@@ -107,3 +107,76 @@ test "compatCaselessMatch" {
107 const c = "He\u{301}llo World! \u{3d2}\u{301}"; 107 const c = "He\u{301}llo World! \u{3d2}\u{301}";
108 try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c)); 108 try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
109} 109}
110
111pub fn canonCaselessMatch(
112 self: Self,
113 allocator: mem.Allocator,
114 normalizer: *const Normalizer,
115 a: []const u8,
116 b: []const u8,
117) !bool {
118 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
119
120 // Process a
121 const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
122 defer allocator.free(nfd_a);
123
124 var need_free_cf_nfd_a = false;
125 var cf_nfd_a: []const u21 = nfd_a;
126 if (self.changesWhenCaseFolded(nfd_a)) {
127 cf_nfd_a = try self.caseFold(allocator, nfd_a);
128 need_free_cf_nfd_a = true;
129 }
130 defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
131
132 var need_free_nfd_cf_nfd_a = false;
133 var nfd_cf_nfd_a = cf_nfd_a;
134 if (!need_free_cf_nfd_a) {
135 nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a);
136 need_free_nfd_cf_nfd_a = true;
137 }
138 defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
139
140 // Process b
141 const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
142 defer allocator.free(nfd_b);
143
144 var need_free_cf_nfd_b = false;
145 var cf_nfd_b: []const u21 = nfd_b;
146 if (self.changesWhenCaseFolded(nfd_b)) {
147 cf_nfd_b = try self.caseFold(allocator, nfd_b);
148 need_free_cf_nfd_b = true;
149 }
150 defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
151
152 var need_free_nfd_cf_nfd_b = false;
153 var nfd_cf_nfd_b = cf_nfd_b;
154 if (!need_free_cf_nfd_b) {
155 nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b);
156 need_free_nfd_cf_nfd_b = true;
157 }
158 defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
159
160 return mem.eql(u21, nfd_cf_nfd_a, nfd_cf_nfd_b);
161}
162
163test "canonCaselessMatch" {
164 const allocator = testing.allocator;
165
166 var norm_data = try Normalizer.NormData.init(allocator);
167 defer norm_data.deinit();
168 const n = Normalizer{ .norm_data = &norm_data };
169
170 var fold_data = try FoldData.init(allocator);
171 defer fold_data.deinit();
172 const caser = Self{ .fold_data = &fold_data };
173
174 try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
175
176 const a = "Héllo World! \u{3d3}";
177 const b = "He\u{301}llo World! \u{3a5}\u{301}";
178 try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b));
179
180 const c = "He\u{301}llo World! \u{3d2}\u{301}";
181 try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c));
182}
diff --git a/src/Normalizer.zig b/src/Normalizer.zig
index 3ff157c..b5a54d1 100644
--- a/src/Normalizer.zig
+++ b/src/Normalizer.zig
@@ -343,6 +343,31 @@ test "nfkd !ASCII / alloc" {
343 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); 343 try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
344} 344}
345 345
346pub fn nfdCodePoints(
347 self: Self,
348 allocator: mem.Allocator,
349 cps: []const u21,
350) ![]u21 {
351 var dcp_list = std.ArrayList(u21).init(allocator);
352 defer dcp_list.deinit();
353
354 var dc_buf: [18]u21 = undefined;
355
356 for (cps) |cp| {
357 const dc = self.decompose(cp, .nfd, &dc_buf);
358
359 if (dc.form == .same) {
360 try dcp_list.append(cp);
361 } else {
362 try dcp_list.appendSlice(dc.cps);
363 }
364 }
365
366 self.canonicalSort(dcp_list.items);
367
368 return try dcp_list.toOwnedSlice();
369}
370
346pub fn nfkdCodePoints( 371pub fn nfkdCodePoints(
347 self: Self, 372 self: Self,
348 allocator: mem.Allocator, 373 allocator: mem.Allocator,
diff --git a/src/main.zig b/src/main.zig
index 46e7c9d..f9ce28e 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -71,7 +71,7 @@ pub fn main() !void {
71 // result += 1; 71 // result += 1;
72 // } 72 // }
73 while (iter.next()) |line| { 73 while (iter.next()) |line| {
74 if (try caser.compatCaselessMatch(allocator, &norm, prev_line, line)) { 74 if (try caser.canonCaselessMatch(allocator, &norm, prev_line, line)) {
75 result += line.len; 75 result += line.len;
76 } 76 }
77 prev_line = line; 77 prev_line = line;