diff options
Diffstat (limited to 'src/CaseFold.zig')
| -rw-r--r-- | src/CaseFold.zig | 256 |
1 files changed, 195 insertions, 61 deletions
diff --git a/src/CaseFold.zig b/src/CaseFold.zig index 6490aea..162e82f 100644 --- a/src/CaseFold.zig +++ b/src/CaseFold.zig | |||
| @@ -1,28 +1,124 @@ | |||
| 1 | const std = @import("std"); | 1 | cutoff: u21 = undefined, |
| 2 | const mem = std.mem; | 2 | cwcf_exceptions_min: u21 = undefined, |
| 3 | const testing = std.testing; | 3 | cwcf_exceptions_max: u21 = undefined, |
| 4 | cwcf_exceptions: []u21 = undefined, | ||
| 5 | multiple_start: u21 = undefined, | ||
| 6 | stage1: []u8 = undefined, | ||
| 7 | stage2: []u8 = undefined, | ||
| 8 | stage3: []i24 = undefined, | ||
| 9 | normalize: Normalize, | ||
| 10 | owns_normalize: bool, | ||
| 11 | |||
| 12 | const CaseFolding = @This(); | ||
| 13 | |||
| 14 | pub fn init(allocator: Allocator) !CaseFolding { | ||
| 15 | var case_fold: CaseFolding = undefined; | ||
| 16 | try case_fold.setup(allocator); | ||
| 17 | return case_fold; | ||
| 18 | } | ||
| 4 | 19 | ||
| 5 | const ascii = @import("ascii"); | 20 | pub fn initWithNormalize(allocator: Allocator, norm: Normalize) !CaseFolding { |
| 6 | pub const FoldData = @import("FoldData"); | 21 | var casefold: CaseFolding = undefined; |
| 7 | const Normalize = @import("Normalize"); | 22 | try casefold.setupWithNormalize(allocator, norm); |
| 23 | return casefold; | ||
| 24 | } | ||
| 25 | |||
| 26 | pub fn setup(casefold: *CaseFolding, allocator: Allocator) !void { | ||
| 27 | try casefold.setupImpl(allocator); | ||
| 28 | casefold.owns_normalize = false; | ||
| 29 | errdefer casefold.deinit(allocator); | ||
| 30 | try casefold.normalize.setup(allocator); | ||
| 31 | casefold.owns_normalize = true; | ||
| 32 | } | ||
| 33 | |||
| 34 | pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: Normalize) !void { | ||
| 35 | try casefold.setupImpl(allocator); | ||
| 36 | casefold.normalize = norm; | ||
| 37 | casefold.owns_normalize = false; | ||
| 38 | } | ||
| 8 | 39 | ||
| 9 | fold_data: *const FoldData, | 40 | fn setupImpl(casefold: *CaseFolding, allocator: Allocator) !void { |
| 41 | const decompressor = compress.flate.inflate.decompressor; | ||
| 42 | const in_bytes = @embedFile("fold"); | ||
| 43 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 44 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 45 | var reader = in_decomp.reader(); | ||
| 46 | |||
| 47 | const endian = builtin.cpu.arch.endian(); | ||
| 48 | |||
| 49 | casefold.cutoff = @intCast(try reader.readInt(u24, endian)); | ||
| 50 | casefold.multiple_start = @intCast(try reader.readInt(u24, endian)); | ||
| 51 | |||
| 52 | var len = try reader.readInt(u16, endian); | ||
| 53 | casefold.stage1 = try allocator.alloc(u8, len); | ||
| 54 | errdefer allocator.free(casefold.stage1); | ||
| 55 | for (0..len) |i| casefold.stage1[i] = try reader.readInt(u8, endian); | ||
| 56 | |||
| 57 | len = try reader.readInt(u16, endian); | ||
| 58 | casefold.stage2 = try allocator.alloc(u8, len); | ||
| 59 | errdefer allocator.free(casefold.stage2); | ||
| 60 | for (0..len) |i| casefold.stage2[i] = try reader.readInt(u8, endian); | ||
| 61 | |||
| 62 | len = try reader.readInt(u16, endian); | ||
| 63 | casefold.stage3 = try allocator.alloc(i24, len); | ||
| 64 | errdefer allocator.free(casefold.stage3); | ||
| 65 | for (0..len) |i| casefold.stage3[i] = try reader.readInt(i24, endian); | ||
| 66 | |||
| 67 | casefold.cwcf_exceptions_min = @intCast(try reader.readInt(u24, endian)); | ||
| 68 | casefold.cwcf_exceptions_max = @intCast(try reader.readInt(u24, endian)); | ||
| 69 | len = try reader.readInt(u16, endian); | ||
| 70 | casefold.cwcf_exceptions = try allocator.alloc(u21, len); | ||
| 71 | errdefer allocator.free(casefold.cwcf_exceptions); | ||
| 72 | for (0..len) |i| casefold.cwcf_exceptions[i] = @intCast(try reader.readInt(u24, endian)); | ||
| 73 | } | ||
| 74 | |||
| 75 | pub fn deinit(fdata: *const CaseFolding, allocator: mem.Allocator) void { | ||
| 76 | allocator.free(fdata.stage1); | ||
| 77 | allocator.free(fdata.stage2); | ||
| 78 | allocator.free(fdata.stage3); | ||
| 79 | allocator.free(fdata.cwcf_exceptions); | ||
| 80 | if (fdata.owns_normalize) fdata.normalize.deinit(allocator); | ||
| 81 | } | ||
| 82 | |||
| 83 | /// Returns the case fold for `cp`. | ||
| 84 | pub fn caseFold(fdata: *const CaseFolding, cp: u21, buf: []u21) []const u21 { | ||
| 85 | if (cp >= fdata.cutoff) return &.{}; | ||
| 86 | |||
| 87 | const stage1_val = fdata.stage1[cp >> 8]; | ||
| 88 | if (stage1_val == 0) return &.{}; | ||
| 89 | |||
| 90 | const stage2_index = @as(usize, stage1_val) * 256 + (cp & 0xFF); | ||
| 91 | const stage3_index = fdata.stage2[stage2_index]; | ||
| 10 | 92 | ||
| 11 | const Self = @This(); | 93 | if (stage3_index & 0x80 != 0) { |
| 94 | const real_index = @as(usize, fdata.multiple_start) + (stage3_index ^ 0x80) * 3; | ||
| 95 | const mapping = mem.sliceTo(fdata.stage3[real_index..][0..3], 0); | ||
| 96 | for (mapping, 0..) |c, i| buf[i] = @intCast(c); | ||
| 97 | |||
| 98 | return buf[0..mapping.len]; | ||
| 99 | } | ||
| 100 | |||
| 101 | const offset = fdata.stage3[stage3_index]; | ||
| 102 | if (offset == 0) return &.{}; | ||
| 103 | |||
| 104 | buf[0] = @intCast(@as(i32, cp) + offset); | ||
| 105 | |||
| 106 | return buf[0..1]; | ||
| 107 | } | ||
| 12 | 108 | ||
| 13 | /// Produces the case folded code points for `cps`. Caller must free returned | 109 | /// Produces the case folded code points for `cps`. Caller must free returned |
| 14 | /// slice with `allocator`. | 110 | /// slice with `allocator`. |
| 15 | pub fn caseFold( | 111 | pub fn caseFoldAlloc( |
| 16 | self: Self, | 112 | casefold: *const CaseFolding, |
| 17 | allocator: mem.Allocator, | 113 | allocator: Allocator, |
| 18 | cps: []const u21, | 114 | cps: []const u21, |
| 19 | ) ![]const u21 { | 115 | ) Allocator.Error![]const u21 { |
| 20 | var cfcps = std.ArrayList(u21).init(allocator); | 116 | var cfcps = std.ArrayList(u21).init(allocator); |
| 21 | defer cfcps.deinit(); | 117 | defer cfcps.deinit(); |
| 22 | var buf: [3]u21 = undefined; | 118 | var buf: [3]u21 = undefined; |
| 23 | 119 | ||
| 24 | for (cps) |cp| { | 120 | for (cps) |cp| { |
| 25 | const cf = self.fold_data.caseFold(cp, &buf); | 121 | const cf = casefold.caseFold(cp, &buf); |
| 26 | 122 | ||
| 27 | if (cf.len == 0) { | 123 | if (cf.len == 0) { |
| 28 | try cfcps.append(cp); | 124 | try cfcps.append(cp); |
| @@ -34,59 +130,71 @@ pub fn caseFold( | |||
| 34 | return try cfcps.toOwnedSlice(); | 130 | return try cfcps.toOwnedSlice(); |
| 35 | } | 131 | } |
| 36 | 132 | ||
| 37 | fn changesWhenCaseFolded(self: Self, cps: []const u21) bool { | 133 | /// Returns true when caseFold(NFD(`cp`)) != NFD(`cp`). |
| 134 | pub fn cpChangesWhenCaseFolded(casefold: *const CaseFolding, cp: u21) bool { | ||
| 135 | var buf: [3]u21 = undefined; | ||
| 136 | const has_mapping = casefold.caseFold(cp, &buf).len != 0; | ||
| 137 | return has_mapping and !casefold.isCwcfException(cp); | ||
| 138 | } | ||
| 139 | |||
| 140 | pub fn changesWhenCaseFolded(casefold: *const CaseFolding, cps: []const u21) bool { | ||
| 38 | return for (cps) |cp| { | 141 | return for (cps) |cp| { |
| 39 | if (self.fold_data.changesWhenCaseFolded(cp)) break true; | 142 | if (casefold.cpChangesWhenCaseFolded(cp)) break true; |
| 40 | } else false; | 143 | } else false; |
| 41 | } | 144 | } |
| 42 | 145 | ||
| 146 | fn isCwcfException(casefold: *const CaseFolding, cp: u21) bool { | ||
| 147 | return cp >= casefold.cwcf_exceptions_min and | ||
| 148 | cp <= casefold.cwcf_exceptions_max and | ||
| 149 | std.mem.indexOfScalar(u21, casefold.cwcf_exceptions, cp) != null; | ||
| 150 | } | ||
| 151 | |||
| 43 | /// Caseless compare `a` and `b` by decomposing to NFKD. This is the most | 152 | /// Caseless compare `a` and `b` by decomposing to NFKD. This is the most |
| 44 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. | 153 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. |
| 45 | pub fn compatCaselessMatch( | 154 | pub fn compatCaselessMatch( |
| 46 | self: Self, | 155 | casefold: *const CaseFolding, |
| 47 | allocator: mem.Allocator, | 156 | allocator: Allocator, |
| 48 | normalizer: *const Normalize, | ||
| 49 | a: []const u8, | 157 | a: []const u8, |
| 50 | b: []const u8, | 158 | b: []const u8, |
| 51 | ) !bool { | 159 | ) Allocator.Error!bool { |
| 52 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 160 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 53 | 161 | ||
| 54 | // Process a | 162 | // Process a |
| 55 | const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd); | 163 | const nfd_a = try casefold.normalize.nfxdCodePoints(allocator, a, .nfd); |
| 56 | defer allocator.free(nfd_a); | 164 | defer allocator.free(nfd_a); |
| 57 | 165 | ||
| 58 | var need_free_cf_nfd_a = false; | 166 | var need_free_cf_nfd_a = false; |
| 59 | var cf_nfd_a: []const u21 = nfd_a; | 167 | var cf_nfd_a: []const u21 = nfd_a; |
| 60 | if (self.changesWhenCaseFolded(nfd_a)) { | 168 | if (casefold.changesWhenCaseFolded(nfd_a)) { |
| 61 | cf_nfd_a = try self.caseFold(allocator, nfd_a); | 169 | cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfd_a); |
| 62 | need_free_cf_nfd_a = true; | 170 | need_free_cf_nfd_a = true; |
| 63 | } | 171 | } |
| 64 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | 172 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| 65 | 173 | ||
| 66 | const nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfd_a); | 174 | const nfkd_cf_nfd_a = try casefold.normalize.nfkdCodePoints(allocator, cf_nfd_a); |
| 67 | defer allocator.free(nfkd_cf_nfd_a); | 175 | defer allocator.free(nfkd_cf_nfd_a); |
| 68 | const cf_nfkd_cf_nfd_a = try self.caseFold(allocator, nfkd_cf_nfd_a); | 176 | const cf_nfkd_cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfkd_cf_nfd_a); |
| 69 | defer allocator.free(cf_nfkd_cf_nfd_a); | 177 | defer allocator.free(cf_nfkd_cf_nfd_a); |
| 70 | const nfkd_cf_nfkd_cf_nfd_a = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); | 178 | const nfkd_cf_nfkd_cf_nfd_a = try casefold.normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); |
| 71 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); | 179 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); |
| 72 | 180 | ||
| 73 | // Process b | 181 | // Process b |
| 74 | const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd); | 182 | const nfd_b = try casefold.normalize.nfxdCodePoints(allocator, b, .nfd); |
| 75 | defer allocator.free(nfd_b); | 183 | defer allocator.free(nfd_b); |
| 76 | 184 | ||
| 77 | var need_free_cf_nfd_b = false; | 185 | var need_free_cf_nfd_b = false; |
| 78 | var cf_nfd_b: []const u21 = nfd_b; | 186 | var cf_nfd_b: []const u21 = nfd_b; |
| 79 | if (self.changesWhenCaseFolded(nfd_b)) { | 187 | if (casefold.changesWhenCaseFolded(nfd_b)) { |
| 80 | cf_nfd_b = try self.caseFold(allocator, nfd_b); | 188 | cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfd_b); |
| 81 | need_free_cf_nfd_b = true; | 189 | need_free_cf_nfd_b = true; |
| 82 | } | 190 | } |
| 83 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | 191 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| 84 | 192 | ||
| 85 | const nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfd_b); | 193 | const nfkd_cf_nfd_b = try casefold.normalize.nfkdCodePoints(allocator, cf_nfd_b); |
| 86 | defer allocator.free(nfkd_cf_nfd_b); | 194 | defer allocator.free(nfkd_cf_nfd_b); |
| 87 | const cf_nfkd_cf_nfd_b = try self.caseFold(allocator, nfkd_cf_nfd_b); | 195 | const cf_nfkd_cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfkd_cf_nfd_b); |
| 88 | defer allocator.free(cf_nfkd_cf_nfd_b); | 196 | defer allocator.free(cf_nfkd_cf_nfd_b); |
| 89 | const nfkd_cf_nfkd_cf_nfd_b = try normalizer.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); | 197 | const nfkd_cf_nfkd_cf_nfd_b = try casefold.normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); |
| 90 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); | 198 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); |
| 91 | 199 | ||
| 92 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); | 200 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); |
| @@ -95,42 +203,37 @@ pub fn compatCaselessMatch( | |||
| 95 | test "compatCaselessMatch" { | 203 | test "compatCaselessMatch" { |
| 96 | const allocator = testing.allocator; | 204 | const allocator = testing.allocator; |
| 97 | 205 | ||
| 98 | const n = try Normalize.init(allocator); | 206 | const caser = try CaseFolding.init(allocator); |
| 99 | defer n.deinit(allocator); | 207 | defer caser.deinit(allocator); |
| 100 | 208 | ||
| 101 | const fold_data = try FoldData.init(allocator); | 209 | try testing.expect(try caser.compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); |
| 102 | defer fold_data.deinit(allocator); | ||
| 103 | const caser = Self{ .fold_data = &fold_data }; | ||
| 104 | |||
| 105 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!")); | ||
| 106 | 210 | ||
| 107 | const a = "Héllo World! \u{3d3}"; | 211 | const a = "Héllo World! \u{3d3}"; |
| 108 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 212 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 109 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, b)); | 213 | try testing.expect(try caser.compatCaselessMatch(allocator, a, b)); |
| 110 | 214 | ||
| 111 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 215 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 112 | try testing.expect(try caser.compatCaselessMatch(allocator, &n, a, c)); | 216 | try testing.expect(try caser.compatCaselessMatch(allocator, a, c)); |
| 113 | } | 217 | } |
| 114 | 218 | ||
| 115 | /// Performs canonical caseless string matching by decomposing to NFD. This is | 219 | /// Performs canonical caseless string matching by decomposing to NFD. This is |
| 116 | /// faster than `compatCaselessMatch`, but less comprehensive. | 220 | /// faster than `compatCaselessMatch`, but less comprehensive. |
| 117 | pub fn canonCaselessMatch( | 221 | pub fn canonCaselessMatch( |
| 118 | self: Self, | 222 | casefold: *const CaseFolding, |
| 119 | allocator: mem.Allocator, | 223 | allocator: Allocator, |
| 120 | normalizer: *const Normalize, | ||
| 121 | a: []const u8, | 224 | a: []const u8, |
| 122 | b: []const u8, | 225 | b: []const u8, |
| 123 | ) !bool { | 226 | ) Allocator.Error!bool { |
| 124 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 227 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 125 | 228 | ||
| 126 | // Process a | 229 | // Process a |
| 127 | const nfd_a = try normalizer.nfxdCodePoints(allocator, a, .nfd); | 230 | const nfd_a = try casefold.normalize.nfxdCodePoints(allocator, a, .nfd); |
| 128 | defer allocator.free(nfd_a); | 231 | defer allocator.free(nfd_a); |
| 129 | 232 | ||
| 130 | var need_free_cf_nfd_a = false; | 233 | var need_free_cf_nfd_a = false; |
| 131 | var cf_nfd_a: []const u21 = nfd_a; | 234 | var cf_nfd_a: []const u21 = nfd_a; |
| 132 | if (self.changesWhenCaseFolded(nfd_a)) { | 235 | if (casefold.changesWhenCaseFolded(nfd_a)) { |
| 133 | cf_nfd_a = try self.caseFold(allocator, nfd_a); | 236 | cf_nfd_a = try casefold.caseFoldAlloc(allocator, nfd_a); |
| 134 | need_free_cf_nfd_a = true; | 237 | need_free_cf_nfd_a = true; |
| 135 | } | 238 | } |
| 136 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | 239 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| @@ -138,19 +241,19 @@ pub fn canonCaselessMatch( | |||
| 138 | var need_free_nfd_cf_nfd_a = false; | 241 | var need_free_nfd_cf_nfd_a = false; |
| 139 | var nfd_cf_nfd_a = cf_nfd_a; | 242 | var nfd_cf_nfd_a = cf_nfd_a; |
| 140 | if (!need_free_cf_nfd_a) { | 243 | if (!need_free_cf_nfd_a) { |
| 141 | nfd_cf_nfd_a = try normalizer.nfdCodePoints(allocator, cf_nfd_a); | 244 | nfd_cf_nfd_a = try casefold.normalize.nfdCodePoints(allocator, cf_nfd_a); |
| 142 | need_free_nfd_cf_nfd_a = true; | 245 | need_free_nfd_cf_nfd_a = true; |
| 143 | } | 246 | } |
| 144 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); | 247 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); |
| 145 | 248 | ||
| 146 | // Process b | 249 | // Process b |
| 147 | const nfd_b = try normalizer.nfxdCodePoints(allocator, b, .nfd); | 250 | const nfd_b = try casefold.normalize.nfxdCodePoints(allocator, b, .nfd); |
| 148 | defer allocator.free(nfd_b); | 251 | defer allocator.free(nfd_b); |
| 149 | 252 | ||
| 150 | var need_free_cf_nfd_b = false; | 253 | var need_free_cf_nfd_b = false; |
| 151 | var cf_nfd_b: []const u21 = nfd_b; | 254 | var cf_nfd_b: []const u21 = nfd_b; |
| 152 | if (self.changesWhenCaseFolded(nfd_b)) { | 255 | if (casefold.changesWhenCaseFolded(nfd_b)) { |
| 153 | cf_nfd_b = try self.caseFold(allocator, nfd_b); | 256 | cf_nfd_b = try casefold.caseFoldAlloc(allocator, nfd_b); |
| 154 | need_free_cf_nfd_b = true; | 257 | need_free_cf_nfd_b = true; |
| 155 | } | 258 | } |
| 156 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | 259 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| @@ -158,7 +261,7 @@ pub fn canonCaselessMatch( | |||
| 158 | var need_free_nfd_cf_nfd_b = false; | 261 | var need_free_nfd_cf_nfd_b = false; |
| 159 | var nfd_cf_nfd_b = cf_nfd_b; | 262 | var nfd_cf_nfd_b = cf_nfd_b; |
| 160 | if (!need_free_cf_nfd_b) { | 263 | if (!need_free_cf_nfd_b) { |
| 161 | nfd_cf_nfd_b = try normalizer.nfdCodePoints(allocator, cf_nfd_b); | 264 | nfd_cf_nfd_b = try casefold.normalize.nfdCodePoints(allocator, cf_nfd_b); |
| 162 | need_free_nfd_cf_nfd_b = true; | 265 | need_free_nfd_cf_nfd_b = true; |
| 163 | } | 266 | } |
| 164 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); | 267 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); |
| @@ -169,19 +272,50 @@ pub fn canonCaselessMatch( | |||
| 169 | test "canonCaselessMatch" { | 272 | test "canonCaselessMatch" { |
| 170 | const allocator = testing.allocator; | 273 | const allocator = testing.allocator; |
| 171 | 274 | ||
| 172 | const n = try Normalize.init(allocator); | 275 | const caser = try CaseFolding.init(allocator); |
| 173 | defer n.deinit(allocator); | 276 | defer caser.deinit(allocator); |
| 174 | |||
| 175 | const fold_data = try FoldData.init(allocator); | ||
| 176 | defer fold_data.deinit(allocator); | ||
| 177 | const caser = Self{ .fold_data = &fold_data }; | ||
| 178 | 277 | ||
| 179 | try testing.expect(try caser.canonCaselessMatch(allocator, &n, "ascii only!", "ASCII Only!")); | 278 | try testing.expect(try caser.canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); |
| 180 | 279 | ||
| 181 | const a = "Héllo World! \u{3d3}"; | 280 | const a = "Héllo World! \u{3d3}"; |
| 182 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 281 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 183 | try testing.expect(!try caser.canonCaselessMatch(allocator, &n, a, b)); | 282 | try testing.expect(!try caser.canonCaselessMatch(allocator, a, b)); |
| 184 | 283 | ||
| 185 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 284 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 186 | try testing.expect(try caser.canonCaselessMatch(allocator, &n, a, c)); | 285 | try testing.expect(try caser.canonCaselessMatch(allocator, a, c)); |
| 187 | } | 286 | } |
| 287 | |||
| 288 | fn testAllocations(allocator: Allocator) !void { | ||
| 289 | // With normalize provided | ||
| 290 | { | ||
| 291 | const normalize = try Normalize.init(allocator); | ||
| 292 | defer normalize.deinit(allocator); | ||
| 293 | const caser1 = try CaseFolding.initWithNormalize(allocator, normalize); | ||
| 294 | defer caser1.deinit(allocator); | ||
| 295 | } | ||
| 296 | // With normalize owned | ||
| 297 | { | ||
| 298 | const caser2 = try CaseFolding.init(allocator); | ||
| 299 | defer caser2.deinit(allocator); | ||
| 300 | } | ||
| 301 | } | ||
| 302 | |||
| 303 | // test "Allocation Failures" { | ||
| 304 | // if (true) return error.SkipZigTest; // XXX: remove | ||
| 305 | // try testing.checkAllAllocationFailures( | ||
| 306 | // testing.allocator, | ||
| 307 | // testAllocations, | ||
| 308 | // .{}, | ||
| 309 | // ); | ||
| 310 | // } | ||
| 311 | |||
| 312 | const std = @import("std"); | ||
| 313 | const builtin = @import("builtin"); | ||
| 314 | const mem = std.mem; | ||
| 315 | const testing = std.testing; | ||
| 316 | const Allocator = mem.Allocator; | ||
| 317 | |||
| 318 | const ascii = @import("ascii"); | ||
| 319 | const Normalize = @import("Normalize"); | ||
| 320 | |||
| 321 | const compress = std.compress; | ||