summaryrefslogtreecommitdiff
path: root/src/Caser.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-01 20:31:52 -0400
committerGravatar Jose Colon Rodriguez2024-03-01 20:31:52 -0400
commit1ecfd06469ed4c2503034796faf4e7dca4196238 (patch)
treefc95244332b24780306228e12cb22ffd27979d5f /src/Caser.zig
parentChanges when case folded check; 20ms faster (diff)
downloadzg-1ecfd06469ed4c2503034796faf4e7dca4196238.tar.gz
zg-1ecfd06469ed4c2503034796faf4e7dca4196238.tar.xz
zg-1ecfd06469ed4c2503034796faf4e7dca4196238.zip
Moved case fold stuff to src/Caser.zig
Diffstat (limited to 'src/Caser.zig')
-rw-r--r--src/Caser.zig109
1 files changed, 109 insertions, 0 deletions
diff --git a/src/Caser.zig b/src/Caser.zig
new file mode 100644
index 0000000..d02370a
--- /dev/null
+++ b/src/Caser.zig
@@ -0,0 +1,109 @@
1const std = @import("std");
2const mem = std.mem;
3const testing = std.testing;
4
5const ascii = @import("ascii");
6pub const FoldData = @import("FoldData");
7const Normalizer = @import("Normalizer");
8
9fold_data: *const FoldData,
10
11const Self = @This();
12
13fn caseFold(
14 self: Self,
15 allocator: mem.Allocator,
16 cps: []const u21,
17) ![]const u21 {
18 var cfcps = std.ArrayList(u21).init(allocator);
19 defer cfcps.deinit();
20
21 for (cps) |cp| {
22 const cf = self.fold_data.caseFold(cp);
23
24 if (cf.len == 0) {
25 try cfcps.append(cp);
26 } else {
27 try cfcps.appendSlice(cf);
28 }
29 }
30
31 return try cfcps.toOwnedSlice();
32}
33
34fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
35 return for (cps) |cp| {
36 if (self.fold_data.changesWhenCaseFolded(cp)) break true;
37 } else false;
38}
39
40pub fn compatCaselessMatch(
41 self: Self,
42 allocator: mem.Allocator,
43 normalizer: *const Normalizer,
44 a: []const u8,
45 b: []const u8,
46) !bool {
47 if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
48
49 // Process a
50 const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
51 defer allocator.free(nfd_a);
52
53 var need_frr_cf_nfd_a = false;
54 var cf_nfd_a: []const u21 = nfd_a;
55 if (self.changesWhenCaseFolded(nfd_a)) {
56 cf_nfd_a = try self.caseFold(allocator, nfd_a);
57 need_frr_cf_nfd_a = true;
58 }
59 defer if (need_frr_cf_nfd_a) allocator.free(cf_nfd_a);
60
61 const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
62 defer allocator.free(nfkd_cf_nfd_a);
63 const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
64 defer allocator.free(cf_nfkd_cf_nfd_a);
65 const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
66 defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
67
68 // Process b
69 const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
70 defer allocator.free(nfd_b);
71
72 var need_frr_cf_nfd_b = false;
73 var cf_nfd_b: []const u21 = nfd_b;
74 if (self.changesWhenCaseFolded(nfd_b)) {
75 cf_nfd_b = try self.caseFold(allocator, nfd_b);
76 need_frr_cf_nfd_b = true;
77 }
78 defer if (need_frr_cf_nfd_b) allocator.free(cf_nfd_b);
79
80 const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
81 defer allocator.free(nfkd_cf_nfd_b);
82 const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
83 defer allocator.free(cf_nfkd_cf_nfd_b);
84 const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
85 defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
86
87 return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
88}
89
90test "compatCaselessMatch" {
91 const allocator = testing.allocator;
92
93 var norm_data = try Normalizer.NormData.init(allocator);
94 defer norm_data.deinit();
95 const n = Normalizer{ .norm_data = &norm_data };
96
97 var fold_data = try FoldData.init(allocator);
98 defer fold_data.deinit();
99 const caser = Self{ .fold_data = &fold_data };
100
101 try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
102
103 const a = "Héllo World! \u{3d3}";
104 const b = "He\u{301}llo World! \u{3a5}\u{301}";
105 try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b));
106
107 const c = "He\u{301}llo World! \u{3d2}\u{301}";
108 try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
109}