1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
const std = @import("std");
const mem = std.mem;
const testing = std.testing;
const ascii = @import("ascii");
pub const FoldData = @import("FoldData");
const Normalize = @import("Normalize");
fold_data: *const FoldData,
const Self = @This();
fn caseFold(
self: Self,
allocator: mem.Allocator,
cps: []const u21,
) ![]const u21 {
var cfcps = std.ArrayList(u21).init(allocator);
defer cfcps.deinit();
for (cps) |cp| {
const cf = self.fold_data.caseFold(cp);
if (cf.len == 0) {
try cfcps.append(cp);
} else {
try cfcps.appendSlice(cf);
}
}
return try cfcps.toOwnedSlice();
}
fn changesWhenCaseFolded(self: Self, cps: []const u21) bool {
return for (cps) |cp| {
if (self.fold_data.changesWhenCaseFolded(cp)) break true;
} else false;
}
pub fn compatCaselessMatch(
self: Self,
allocator: mem.Allocator,
normalizer: *const Normalize,
a: []const u8,
b: []const u8,
) !bool {
if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
// Process a
const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
defer allocator.free(nfd_a);
var need_free_cf_nfd_a = false;
var cf_nfd_a: []const u21 = nfd_a;
if (self.changesWhenCaseFolded(nfd_a)) {
cf_nfd_a = try self.caseFold(allocator, nfd_a);
need_free_cf_nfd_a = true;
}
defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a);
defer allocator.free(nfkd_cf_nfd_a);
const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a);
defer allocator.free(cf_nfkd_cf_nfd_a);
const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
// Process b
const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
defer allocator.free(nfd_b);
var need_free_cf_nfd_b = false;
var cf_nfd_b: []const u21 = nfd_b;
if (self.changesWhenCaseFolded(nfd_b)) {
cf_nfd_b = try self.caseFold(allocator, nfd_b);
need_free_cf_nfd_b = true;
}
defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b);
defer allocator.free(nfkd_cf_nfd_b);
const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b);
defer allocator.free(cf_nfkd_cf_nfd_b);
const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
}
test "compatCaselessMatch" {
const allocator = testing.allocator;
var norm_data = try Normalize.NormData.init(allocator);
defer norm_data.deinit();
const n = Normalize{ .norm_data = &norm_data };
var fold_data = try FoldData.init(allocator);
defer fold_data.deinit();
const caser = Self{ .fold_data = &fold_data };
try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
const a = "Héllo World! \u{3d3}";
const b = "He\u{301}llo World! \u{3a5}\u{301}";
try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b));
const c = "He\u{301}llo World! \u{3d2}\u{301}";
try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c));
}
pub fn canonCaselessMatch(
self: Self,
allocator: mem.Allocator,
normalizer: *const Normalize,
a: []const u8,
b: []const u8,
) !bool {
if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
// Process a
const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd);
defer allocator.free(nfd_a);
var need_free_cf_nfd_a = false;
var cf_nfd_a: []const u21 = nfd_a;
if (self.changesWhenCaseFolded(nfd_a)) {
cf_nfd_a = try self.caseFold(allocator, nfd_a);
need_free_cf_nfd_a = true;
}
defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
var need_free_nfd_cf_nfd_a = false;
var nfd_cf_nfd_a = cf_nfd_a;
if (!need_free_cf_nfd_a) {
nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a);
need_free_nfd_cf_nfd_a = true;
}
defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
// Process b
const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd);
defer allocator.free(nfd_b);
var need_free_cf_nfd_b = false;
var cf_nfd_b: []const u21 = nfd_b;
if (self.changesWhenCaseFolded(nfd_b)) {
cf_nfd_b = try self.caseFold(allocator, nfd_b);
need_free_cf_nfd_b = true;
}
defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
var need_free_nfd_cf_nfd_b = false;
var nfd_cf_nfd_b = cf_nfd_b;
if (!need_free_cf_nfd_b) {
nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b);
need_free_nfd_cf_nfd_b = true;
}
defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
return mem.eql(u21, nfd_cf_nfd_a, nfd_cf_nfd_b);
}
test "canonCaselessMatch" {
const allocator = testing.allocator;
var norm_data = try Normalize.NormData.init(allocator);
defer norm_data.deinit();
const n = Normalize{ .norm_data = &norm_data };
var fold_data = try FoldData.init(allocator);
defer fold_data.deinit();
const caser = Self{ .fold_data = &fold_data };
try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!"));
const a = "Héllo World! \u{3d3}";
const b = "He\u{301}llo World! \u{3a5}\u{301}";
try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b));
const c = "He\u{301}llo World! \u{3d2}\u{301}";
try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c));
}
|