diff options
| author | 2026-02-05 07:07:40 -0500 | |
|---|---|---|
| committer | 2026-02-05 07:07:40 -0500 | |
| commit | 95f9487f6a7bde2d7266399bdf6843b97cc1b301 (patch) | |
| tree | 122cd20fa574861e807844974b49eb2f91285d3c | |
| parent | Teasing out canonicalization (diff) | |
| download | zg-95f9487f6a7bde2d7266399bdf6843b97cc1b301.tar.gz zg-95f9487f6a7bde2d7266399bdf6843b97cc1b301.tar.xz zg-95f9487f6a7bde2d7266399bdf6843b97cc1b301.zip | |
Base units do not allocate
CanonData included. I may still sort out caseless matching without
allocation, but that's a stretch goal.
Closes #86
Closes #85
| -rw-r--r-- | src/CanonData.zig | 57 | ||||
| -rw-r--r-- | src/CaseFolding.zig | 40 | ||||
| -rw-r--r-- | src/Normalize.zig | 143 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 11 |
4 files changed, 85 insertions, 166 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig index c972534..5c1ffa6 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -1,13 +1,23 @@ | |||
| 1 | //! Canonicalization Data | 1 | //! Canonicalization Data |
| 2 | 2 | ||
| 3 | s1: []const u16 = undefined, | 3 | const Data = struct { |
| 4 | s2: []const @import("canon").Canonicalization = undefined, | 4 | s1: []const u16 = undefined, |
| 5 | nfc: std.AutoHashMapUnmanaged([2]u21, u21), | 5 | s2: []const @import("canon").Canonicalization = undefined, |
| 6 | }; | ||
| 7 | |||
| 8 | const canon_data = canon_data: { | ||
| 9 | const canon_ = @import("canon"); | ||
| 10 | break :canon_data Data{ | ||
| 11 | .s1 = &canon_.s1, | ||
| 12 | .s2 = &canon_.s2, | ||
| 13 | }; | ||
| 14 | }; | ||
| 6 | 15 | ||
| 7 | const CanonData = @This(); | 16 | const CanonData = @This(); |
| 8 | 17 | ||
| 9 | // There's a bug here, which is down to how static u21 vs. runtime are handled, | 18 | // There's a bug here, which is down to how static u21 vs. runtime are handled, |
| 10 | // the "unique representation" claim is not working out. So we do this: | 19 | // the "unique representation" claim is not working out. AutoHash casts to bytes, |
| 20 | // and that won't fly. So we do this: | ||
| 11 | 21 | ||
| 12 | const Context = struct { | 22 | const Context = struct { |
| 13 | pub fn hash(_: Context, cps: [2]u21) u64 { | 23 | pub fn hash(_: Context, cps: [2]u21) u64 { |
| @@ -22,47 +32,14 @@ const Context = struct { | |||
| 22 | 32 | ||
| 23 | const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map); | 33 | const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map); |
| 24 | 34 | ||
| 25 | pub fn init(allocator: mem.Allocator) !CanonData { | ||
| 26 | var cdata = CanonData{ | ||
| 27 | .nfc = .empty, | ||
| 28 | }; | ||
| 29 | errdefer cdata.deinit(allocator); | ||
| 30 | |||
| 31 | const data = @import("canon"); | ||
| 32 | cdata.s1 = &data.s1; | ||
| 33 | cdata.s2 = &data.s2; | ||
| 34 | var count: usize = 0; | ||
| 35 | for (data.composite) |cp| { | ||
| 36 | count += 1; | ||
| 37 | const cps = cdata.toNfd(cp); | ||
| 38 | std.debug.assert(cps.len == 2); | ||
| 39 | try cdata.nfc.put(allocator, cps[0..2].*, cp); | ||
| 40 | } | ||
| 41 | |||
| 42 | // var keys = cdata.nfc.keyIterator(); | ||
| 43 | // while (keys.next()) |key| { | ||
| 44 | // const c32: [2]u32 = .{ key[0], key[1] }; | ||
| 45 | // if (c_map.get(c32)) |_| { | ||
| 46 | // std.debug.print("got", .{}); | ||
| 47 | // } | ||
| 48 | // } | ||
| 49 | |||
| 50 | return cdata; | ||
| 51 | } | ||
| 52 | |||
| 53 | pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void { | ||
| 54 | cdata.nfc.deinit(allocator); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Returns canonical decomposition for `cp`. | 35 | /// Returns canonical decomposition for `cp`. |
| 58 | pub fn toNfd(cdata: *const CanonData, cp: u21) []const u21 { | 36 | pub fn toNfd(cp: u21) []const u21 { |
| 59 | const canon = &cdata.s2[cdata.s1[cp >> 8] + (cp & 0xff)]; | 37 | const canon = &canon_data.s2[canon_data.s1[cp >> 8] + (cp & 0xff)]; |
| 60 | return canon.cps[0..canon.len]; | 38 | return canon.cps[0..canon.len]; |
| 61 | } | 39 | } |
| 62 | 40 | ||
| 63 | // Returns the primary composite for the codepoints in `cp`. | 41 | // Returns the primary composite for the codepoints in `cp`. |
| 64 | pub fn toNfc(cdata: *const CanonData, cps: [2]u21) ?u21 { | 42 | pub fn toNfc(cps: [2]u21) ?u21 { |
| 65 | _ = cdata; | ||
| 66 | if (c_map.get(cps)) |cpp| { | 43 | if (c_map.get(cps)) |cpp| { |
| 67 | return cpp.*; | 44 | return cpp.*; |
| 68 | } else { | 45 | } else { |
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 88f047c..d69cddc 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig | |||
| @@ -100,14 +100,13 @@ fn isCwcfException(cp: u21) bool { | |||
| 100 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. | 100 | /// comprehensive comparison possible, but slower than `canonCaselessMatch`. |
| 101 | pub fn compatCaselessMatch( | 101 | pub fn compatCaselessMatch( |
| 102 | allocator: Allocator, | 102 | allocator: Allocator, |
| 103 | normalize: Normalize, | ||
| 104 | a: []const u8, | 103 | a: []const u8, |
| 105 | b: []const u8, | 104 | b: []const u8, |
| 106 | ) Allocator.Error!bool { | 105 | ) Allocator.Error!bool { |
| 107 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 106 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 108 | 107 | ||
| 109 | // Process a | 108 | // Process a |
| 110 | const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); | 109 | const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); |
| 111 | defer allocator.free(nfd_a); | 110 | defer allocator.free(nfd_a); |
| 112 | 111 | ||
| 113 | var need_free_cf_nfd_a = false; | 112 | var need_free_cf_nfd_a = false; |
| @@ -118,15 +117,15 @@ pub fn compatCaselessMatch( | |||
| 118 | } | 117 | } |
| 119 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); | 118 | defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); |
| 120 | 119 | ||
| 121 | const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a); | 120 | const nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfd_a); |
| 122 | defer allocator.free(nfkd_cf_nfd_a); | 121 | defer allocator.free(nfkd_cf_nfd_a); |
| 123 | const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); | 122 | const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); |
| 124 | defer allocator.free(cf_nfkd_cf_nfd_a); | 123 | defer allocator.free(cf_nfkd_cf_nfd_a); |
| 125 | const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); | 124 | const nfkd_cf_nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); |
| 126 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); | 125 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); |
| 127 | 126 | ||
| 128 | // Process b | 127 | // Process b |
| 129 | const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); | 128 | const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); |
| 130 | defer allocator.free(nfd_b); | 129 | defer allocator.free(nfd_b); |
| 131 | 130 | ||
| 132 | var need_free_cf_nfd_b = false; | 131 | var need_free_cf_nfd_b = false; |
| @@ -137,11 +136,11 @@ pub fn compatCaselessMatch( | |||
| 137 | } | 136 | } |
| 138 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); | 137 | defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); |
| 139 | 138 | ||
| 140 | const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b); | 139 | const nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfd_b); |
| 141 | defer allocator.free(nfkd_cf_nfd_b); | 140 | defer allocator.free(nfkd_cf_nfd_b); |
| 142 | const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); | 141 | const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); |
| 143 | defer allocator.free(cf_nfkd_cf_nfd_b); | 142 | defer allocator.free(cf_nfkd_cf_nfd_b); |
| 144 | const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); | 143 | const nfkd_cf_nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); |
| 145 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); | 144 | defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); |
| 146 | 145 | ||
| 147 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); | 146 | return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); |
| @@ -176,31 +175,27 @@ test "caseFold" { | |||
| 176 | test "compatCaselessMatch" { | 175 | test "compatCaselessMatch" { |
| 177 | const allocator = testing.allocator; | 176 | const allocator = testing.allocator; |
| 178 | 177 | ||
| 179 | var normalize = try Normalize.init(allocator); | 178 | try testing.expect(try compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); |
| 180 | defer normalize.deinit(allocator); | ||
| 181 | |||
| 182 | try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); | ||
| 183 | 179 | ||
| 184 | const a = "Héllo World! \u{3d3}"; | 180 | const a = "Héllo World! \u{3d3}"; |
| 185 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 181 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 186 | try testing.expect(try compatCaselessMatch(allocator, normalize, a, b)); | 182 | try testing.expect(try compatCaselessMatch(allocator, a, b)); |
| 187 | 183 | ||
| 188 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 184 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 189 | try testing.expect(try compatCaselessMatch(allocator, normalize, a, c)); | 185 | try testing.expect(try compatCaselessMatch(allocator, a, c)); |
| 190 | } | 186 | } |
| 191 | 187 | ||
| 192 | /// Performs canonical caseless string matching by decomposing to NFD. This is | 188 | /// Performs canonical caseless string matching by decomposing to NFD. This is |
| 193 | /// faster than `compatCaselessMatch`, but less comprehensive. | 189 | /// faster than `compatCaselessMatch`, but less comprehensive. |
| 194 | pub fn canonCaselessMatch( | 190 | pub fn canonCaselessMatch( |
| 195 | allocator: Allocator, | 191 | allocator: Allocator, |
| 196 | normalize: Normalize, | ||
| 197 | a: []const u8, | 192 | a: []const u8, |
| 198 | b: []const u8, | 193 | b: []const u8, |
| 199 | ) Allocator.Error!bool { | 194 | ) Allocator.Error!bool { |
| 200 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); | 195 | if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); |
| 201 | 196 | ||
| 202 | // Process a | 197 | // Process a |
| 203 | const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); | 198 | const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); |
| 204 | defer allocator.free(nfd_a); | 199 | defer allocator.free(nfd_a); |
| 205 | 200 | ||
| 206 | var need_free_cf_nfd_a = false; | 201 | var need_free_cf_nfd_a = false; |
| @@ -214,13 +209,13 @@ pub fn canonCaselessMatch( | |||
| 214 | var need_free_nfd_cf_nfd_a = false; | 209 | var need_free_nfd_cf_nfd_a = false; |
| 215 | var nfd_cf_nfd_a = cf_nfd_a; | 210 | var nfd_cf_nfd_a = cf_nfd_a; |
| 216 | if (!need_free_cf_nfd_a) { | 211 | if (!need_free_cf_nfd_a) { |
| 217 | nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a); | 212 | nfd_cf_nfd_a = try Normalize.nfdCodePoints(allocator, cf_nfd_a); |
| 218 | need_free_nfd_cf_nfd_a = true; | 213 | need_free_nfd_cf_nfd_a = true; |
| 219 | } | 214 | } |
| 220 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); | 215 | defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); |
| 221 | 216 | ||
| 222 | // Process b | 217 | // Process b |
| 223 | const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); | 218 | const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); |
| 224 | defer allocator.free(nfd_b); | 219 | defer allocator.free(nfd_b); |
| 225 | 220 | ||
| 226 | var need_free_cf_nfd_b = false; | 221 | var need_free_cf_nfd_b = false; |
| @@ -234,7 +229,7 @@ pub fn canonCaselessMatch( | |||
| 234 | var need_free_nfd_cf_nfd_b = false; | 229 | var need_free_nfd_cf_nfd_b = false; |
| 235 | var nfd_cf_nfd_b = cf_nfd_b; | 230 | var nfd_cf_nfd_b = cf_nfd_b; |
| 236 | if (!need_free_cf_nfd_b) { | 231 | if (!need_free_cf_nfd_b) { |
| 237 | nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b); | 232 | nfd_cf_nfd_b = try Normalize.nfdCodePoints(allocator, cf_nfd_b); |
| 238 | need_free_nfd_cf_nfd_b = true; | 233 | need_free_nfd_cf_nfd_b = true; |
| 239 | } | 234 | } |
| 240 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); | 235 | defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); |
| @@ -245,17 +240,14 @@ pub fn canonCaselessMatch( | |||
| 245 | test "canonCaselessMatch" { | 240 | test "canonCaselessMatch" { |
| 246 | const allocator = testing.allocator; | 241 | const allocator = testing.allocator; |
| 247 | 242 | ||
| 248 | var normalize = try Normalize.init(allocator); | 243 | try testing.expect(try canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); |
| 249 | defer normalize.deinit(allocator); | ||
| 250 | |||
| 251 | try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); | ||
| 252 | 244 | ||
| 253 | const a = "Héllo World! \u{3d3}"; | 245 | const a = "Héllo World! \u{3d3}"; |
| 254 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; | 246 | const b = "He\u{301}llo World! \u{3a5}\u{301}"; |
| 255 | try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b)); | 247 | try testing.expect(!try canonCaselessMatch(allocator, a, b)); |
| 256 | 248 | ||
| 257 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; | 249 | const c = "He\u{301}llo World! \u{3d2}\u{301}"; |
| 258 | try testing.expect(try canonCaselessMatch(allocator, normalize, a, c)); | 250 | try testing.expect(try canonCaselessMatch(allocator, a, c)); |
| 259 | } | 251 | } |
| 260 | 252 | ||
| 261 | const std = @import("std"); | 253 | const std = @import("std"); |
diff --git a/src/Normalize.zig b/src/Normalize.zig index 3191a8c..865318f 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -2,25 +2,8 @@ | |||
| 2 | //! Unicode Normalization. You can normalize strings into NFC, | 2 | //! Unicode Normalization. You can normalize strings into NFC, |
| 3 | //! NFKC, NFD, and NFKD normalization forms. | 3 | //! NFKC, NFD, and NFKD normalization forms. |
| 4 | 4 | ||
| 5 | canon_data: CanonData = undefined, | ||
| 6 | |||
| 7 | const Normalize = @This(); | 5 | const Normalize = @This(); |
| 8 | 6 | ||
| 9 | pub fn init(allocator: Allocator) !Normalize { | ||
| 10 | var norm: Normalize = undefined; | ||
| 11 | try norm.setup(allocator); | ||
| 12 | return norm; | ||
| 13 | } | ||
| 14 | |||
| 15 | pub fn setup(self: *Normalize, allocator: Allocator) !void { | ||
| 16 | self.canon_data = try CanonData.init(allocator); | ||
| 17 | } | ||
| 18 | |||
| 19 | pub fn deinit(norm: *const Normalize, allocator: Allocator) void { | ||
| 20 | const mut_norm = @constCast(norm); | ||
| 21 | mut_norm.canon_data.deinit(allocator); | ||
| 22 | } | ||
| 23 | |||
| 24 | const SBase: u21 = 0xAC00; | 7 | const SBase: u21 = 0xAC00; |
| 25 | const LBase: u21 = 0x1100; | 8 | const LBase: u21 = 0x1100; |
| 26 | const VBase: u21 = 0x1161; | 9 | const VBase: u21 = 0x1161; |
| @@ -91,12 +74,12 @@ const Decomp = struct { | |||
| 91 | }; | 74 | }; |
| 92 | 75 | ||
| 93 | // `mapping` retrieves the decomposition mapping for a code point as per the UCD. | 76 | // `mapping` retrieves the decomposition mapping for a code point as per the UCD. |
| 94 | fn mapping(self: Normalize, cp: u21, form: Form) Decomp { | 77 | fn mapping(cp: u21, form: Form) Decomp { |
| 95 | var dc = Decomp{}; | 78 | var dc = Decomp{}; |
| 96 | 79 | ||
| 97 | switch (form) { | 80 | switch (form) { |
| 98 | .nfd => { | 81 | .nfd => { |
| 99 | dc.cps = self.canon_data.toNfd(cp); | 82 | dc.cps = CanonData.toNfd(cp); |
| 100 | if (dc.cps.len != 0) dc.form = .nfd; | 83 | if (dc.cps.len != 0) dc.form = .nfd; |
| 101 | }, | 84 | }, |
| 102 | 85 | ||
| @@ -105,7 +88,7 @@ fn mapping(self: Normalize, cp: u21, form: Form) Decomp { | |||
| 105 | if (dc.cps.len != 0) { | 88 | if (dc.cps.len != 0) { |
| 106 | dc.form = .nfkd; | 89 | dc.form = .nfkd; |
| 107 | } else { | 90 | } else { |
| 108 | dc.cps = self.canon_data.toNfd(cp); | 91 | dc.cps = CanonData.toNfd(cp); |
| 109 | if (dc.cps.len != 0) dc.form = .nfkd; | 92 | if (dc.cps.len != 0) dc.form = .nfkd; |
| 110 | } | 93 | } |
| 111 | }, | 94 | }, |
| @@ -117,12 +100,7 @@ fn mapping(self: Normalize, cp: u21, form: Form) Decomp { | |||
| 117 | } | 100 | } |
| 118 | 101 | ||
| 119 | // `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. | 102 | // `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. |
| 120 | fn decompose( | 103 | fn decompose(cp: u21, form: Form, buf: []u21) Decomp { |
| 121 | self: Normalize, | ||
| 122 | cp: u21, | ||
| 123 | form: Form, | ||
| 124 | buf: []u21, | ||
| 125 | ) Decomp { | ||
| 126 | // ASCII | 104 | // ASCII |
| 127 | if (cp < 128) return .{}; | 105 | if (cp < 128) return .{}; |
| 128 | 106 | ||
| @@ -149,7 +127,7 @@ fn decompose( | |||
| 149 | // Look at previous code point in work queue. | 127 | // Look at previous code point in work queue. |
| 150 | work_index -= 1; | 128 | work_index -= 1; |
| 151 | const next = work[work_index]; | 129 | const next = work[work_index]; |
| 152 | const m = self.mapping(next, form); | 130 | const m = Normalize.mapping(next, form); |
| 153 | 131 | ||
| 154 | // No more of decompositions for this code point. | 132 | // No more of decompositions for this code point. |
| 155 | if (m.form == .same) { | 133 | if (m.form == .same) { |
| @@ -175,44 +153,41 @@ fn decompose( | |||
| 175 | } | 153 | } |
| 176 | 154 | ||
| 177 | test "decompose" { | 155 | test "decompose" { |
| 178 | const allocator = testing.allocator; | ||
| 179 | var n = try Normalize.init(allocator); | ||
| 180 | defer n.deinit(allocator); | ||
| 181 | var buf: [18]u21 = undefined; | 156 | var buf: [18]u21 = undefined; |
| 182 | 157 | ||
| 183 | var dc = n.decompose('é', .nfd, &buf); | 158 | var dc = Normalize.decompose('é', .nfd, &buf); |
| 184 | try testing.expect(dc.form == .nfd); | 159 | try testing.expect(dc.form == .nfd); |
| 185 | try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]); | 160 | try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]); |
| 186 | 161 | ||
| 187 | dc = n.decompose('\u{1e0a}', .nfd, &buf); | 162 | dc = Normalize.decompose('\u{1e0a}', .nfd, &buf); |
| 188 | try testing.expect(dc.form == .nfd); | 163 | try testing.expect(dc.form == .nfd); |
| 189 | try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]); | 164 | try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]); |
| 190 | 165 | ||
| 191 | dc = n.decompose('\u{1e0a}', .nfkd, &buf); | 166 | dc = Normalize.decompose('\u{1e0a}', .nfkd, &buf); |
| 192 | try testing.expect(dc.form == .nfkd); | 167 | try testing.expect(dc.form == .nfkd); |
| 193 | try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]); | 168 | try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]); |
| 194 | 169 | ||
| 195 | dc = n.decompose('\u{3189}', .nfd, &buf); | 170 | dc = Normalize.decompose('\u{3189}', .nfd, &buf); |
| 196 | try testing.expect(dc.form == .same); | 171 | try testing.expect(dc.form == .same); |
| 197 | try testing.expect(dc.cps.len == 0); | 172 | try testing.expect(dc.cps.len == 0); |
| 198 | 173 | ||
| 199 | dc = n.decompose('\u{3189}', .nfkd, &buf); | 174 | dc = Normalize.decompose('\u{3189}', .nfkd, &buf); |
| 200 | try testing.expect(dc.form == .nfkd); | 175 | try testing.expect(dc.form == .nfkd); |
| 201 | try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]); | 176 | try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]); |
| 202 | 177 | ||
| 203 | dc = n.decompose('\u{ace1}', .nfd, &buf); | 178 | dc = Normalize.decompose('\u{ace1}', .nfd, &buf); |
| 204 | try testing.expect(dc.form == .nfd); | 179 | try testing.expect(dc.form == .nfd); |
| 205 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]); | 180 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]); |
| 206 | 181 | ||
| 207 | dc = n.decompose('\u{ace1}', .nfkd, &buf); | 182 | dc = Normalize.decompose('\u{ace1}', .nfkd, &buf); |
| 208 | try testing.expect(dc.form == .nfd); | 183 | try testing.expect(dc.form == .nfd); |
| 209 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]); | 184 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]); |
| 210 | 185 | ||
| 211 | dc = n.decompose('\u{3d3}', .nfd, &buf); | 186 | dc = Normalize.decompose('\u{3d3}', .nfd, &buf); |
| 212 | try testing.expect(dc.form == .nfd); | 187 | try testing.expect(dc.form == .nfd); |
| 213 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]); | 188 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]); |
| 214 | 189 | ||
| 215 | dc = n.decompose('\u{3d3}', .nfkd, &buf); | 190 | dc = Normalize.decompose('\u{3d3}', .nfkd, &buf); |
| 216 | try testing.expect(dc.form == .nfkd); | 191 | try testing.expect(dc.form == .nfkd); |
| 217 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]); | 192 | try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]); |
| 218 | } | 193 | } |
| @@ -231,8 +206,8 @@ pub const Result = struct { | |||
| 231 | return .{ .allocated = true, .slice = try allocator.dupe(u8, result.slice) }; | 206 | return .{ .allocated = true, .slice = try allocator.dupe(u8, result.slice) }; |
| 232 | } | 207 | } |
| 233 | 208 | ||
| 234 | pub fn deinit(self: *const Result, allocator: Allocator) void { | 209 | pub fn deinit(result: *const Result, allocator: Allocator) void { |
| 235 | if (self.allocated) allocator.free(self.slice); | 210 | if (result.allocated) allocator.free(result.slice); |
| 236 | } | 211 | } |
| 237 | }; | 212 | }; |
| 238 | 213 | ||
| @@ -252,16 +227,16 @@ fn canonicalSort(cps: []u21) void { | |||
| 252 | } | 227 | } |
| 253 | 228 | ||
| 254 | /// Normalize `str` to NFD. | 229 | /// Normalize `str` to NFD. |
| 255 | pub fn nfd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { | 230 | pub fn nfd(allocator: Allocator, str: []const u8) Allocator.Error!Result { |
| 256 | return self.nfxd(allocator, str, .nfd); | 231 | return Normalize.nfxd(allocator, str, .nfd); |
| 257 | } | 232 | } |
| 258 | 233 | ||
| 259 | /// Normalize `str` to NFKD. | 234 | /// Normalize `str` to NFKD. |
| 260 | pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { | 235 | pub fn nfkd(allocator: Allocator, str: []const u8) Allocator.Error!Result { |
| 261 | return self.nfxd(allocator, str, .nfkd); | 236 | return Normalize.nfxd(allocator, str, .nfkd); |
| 262 | } | 237 | } |
| 263 | 238 | ||
| 264 | pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { | 239 | pub fn nfxdCodePoints(allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { |
| 265 | var dcp_list = std.array_list.Managed(u21).init(allocator); | 240 | var dcp_list = std.array_list.Managed(u21).init(allocator); |
| 266 | defer dcp_list.deinit(); | 241 | defer dcp_list.deinit(); |
| 267 | 242 | ||
| @@ -269,7 +244,7 @@ pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, fo | |||
| 269 | var dc_buf: [18]u21 = undefined; | 244 | var dc_buf: [18]u21 = undefined; |
| 270 | 245 | ||
| 271 | while (cp_iter.next()) |cp| { | 246 | while (cp_iter.next()) |cp| { |
| 272 | const dc = self.decompose(cp.code, form, &dc_buf); | 247 | const dc = Normalize.decompose(cp.code, form, &dc_buf); |
| 273 | if (dc.form == .same) { | 248 | if (dc.form == .same) { |
| 274 | try dcp_list.append(cp.code); | 249 | try dcp_list.append(cp.code); |
| 275 | } else { | 250 | } else { |
| @@ -282,11 +257,11 @@ pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, fo | |||
| 282 | return try dcp_list.toOwnedSlice(); | 257 | return try dcp_list.toOwnedSlice(); |
| 283 | } | 258 | } |
| 284 | 259 | ||
| 285 | fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { | 260 | fn nfxd(allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { |
| 286 | // Quick checks. | 261 | // Quick checks. |
| 287 | if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; | 262 | if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; |
| 288 | 263 | ||
| 289 | const dcps = try self.nfxdCodePoints(allocator, str, form); | 264 | const dcps = try Normalize.nfxdCodePoints(allocator, str, form); |
| 290 | defer allocator.free(dcps); | 265 | defer allocator.free(dcps); |
| 291 | 266 | ||
| 292 | var dstr_list = std.array_list.Managed(u8).init(allocator); | 267 | var dstr_list = std.array_list.Managed(u8).init(allocator); |
| @@ -303,10 +278,8 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo | |||
| 303 | 278 | ||
| 304 | test "nfd ASCII / no-alloc" { | 279 | test "nfd ASCII / no-alloc" { |
| 305 | const allocator = testing.allocator; | 280 | const allocator = testing.allocator; |
| 306 | var n = try Normalize.init(allocator); | ||
| 307 | defer n.deinit(allocator); | ||
| 308 | 281 | ||
| 309 | const result = try n.nfd(allocator, "Hello World!"); | 282 | const result = try Normalize.nfd(allocator, "Hello World!"); |
| 310 | defer result.deinit(allocator); | 283 | defer result.deinit(allocator); |
| 311 | 284 | ||
| 312 | try testing.expectEqualStrings("Hello World!", result.slice); | 285 | try testing.expectEqualStrings("Hello World!", result.slice); |
| @@ -314,10 +287,8 @@ test "nfd ASCII / no-alloc" { | |||
| 314 | 287 | ||
| 315 | test "nfd !ASCII / alloc" { | 288 | test "nfd !ASCII / alloc" { |
| 316 | const allocator = testing.allocator; | 289 | const allocator = testing.allocator; |
| 317 | var n = try Normalize.init(allocator); | ||
| 318 | defer n.deinit(allocator); | ||
| 319 | 290 | ||
| 320 | const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 291 | const result = try Normalize.nfd(allocator, "Héllo World! \u{3d3}"); |
| 321 | defer result.deinit(allocator); | 292 | defer result.deinit(allocator); |
| 322 | 293 | ||
| 323 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); | 294 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); |
| @@ -325,10 +296,8 @@ test "nfd !ASCII / alloc" { | |||
| 325 | 296 | ||
| 326 | test "nfkd ASCII / no-alloc" { | 297 | test "nfkd ASCII / no-alloc" { |
| 327 | const allocator = testing.allocator; | 298 | const allocator = testing.allocator; |
| 328 | var n = try Normalize.init(allocator); | ||
| 329 | defer n.deinit(allocator); | ||
| 330 | 299 | ||
| 331 | const result = try n.nfkd(allocator, "Hello World!"); | 300 | const result = try Normalize.nfkd(allocator, "Hello World!"); |
| 332 | defer result.deinit(allocator); | 301 | defer result.deinit(allocator); |
| 333 | 302 | ||
| 334 | try testing.expectEqualStrings("Hello World!", result.slice); | 303 | try testing.expectEqualStrings("Hello World!", result.slice); |
| @@ -336,27 +305,21 @@ test "nfkd ASCII / no-alloc" { | |||
| 336 | 305 | ||
| 337 | test "nfkd !ASCII / alloc" { | 306 | test "nfkd !ASCII / alloc" { |
| 338 | const allocator = testing.allocator; | 307 | const allocator = testing.allocator; |
| 339 | var n = try Normalize.init(allocator); | ||
| 340 | defer n.deinit(allocator); | ||
| 341 | 308 | ||
| 342 | const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 309 | const result = try Normalize.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 343 | defer result.deinit(allocator); | 310 | defer result.deinit(allocator); |
| 344 | 311 | ||
| 345 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); | 312 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); |
| 346 | } | 313 | } |
| 347 | 314 | ||
| 348 | pub fn nfdCodePoints( | 315 | pub fn nfdCodePoints(allocator: Allocator, cps: []const u21) Allocator.Error![]u21 { |
| 349 | self: Normalize, | ||
| 350 | allocator: Allocator, | ||
| 351 | cps: []const u21, | ||
| 352 | ) Allocator.Error![]u21 { | ||
| 353 | var dcp_list = std.array_list.Managed(u21).init(allocator); | 316 | var dcp_list = std.array_list.Managed(u21).init(allocator); |
| 354 | defer dcp_list.deinit(); | 317 | defer dcp_list.deinit(); |
| 355 | 318 | ||
| 356 | var dc_buf: [18]u21 = undefined; | 319 | var dc_buf: [18]u21 = undefined; |
| 357 | 320 | ||
| 358 | for (cps) |cp| { | 321 | for (cps) |cp| { |
| 359 | const dc = self.decompose(cp, .nfd, &dc_buf); | 322 | const dc = Normalize.decompose(cp, .nfd, &dc_buf); |
| 360 | 323 | ||
| 361 | if (dc.form == .same) { | 324 | if (dc.form == .same) { |
| 362 | try dcp_list.append(cp); | 325 | try dcp_list.append(cp); |
| @@ -370,18 +333,14 @@ pub fn nfdCodePoints( | |||
| 370 | return try dcp_list.toOwnedSlice(); | 333 | return try dcp_list.toOwnedSlice(); |
| 371 | } | 334 | } |
| 372 | 335 | ||
| 373 | pub fn nfkdCodePoints( | 336 | pub fn nfkdCodePoints(allocator: Allocator, cps: []const u21) Allocator.Error![]u21 { |
| 374 | self: Normalize, | ||
| 375 | allocator: Allocator, | ||
| 376 | cps: []const u21, | ||
| 377 | ) Allocator.Error![]u21 { | ||
| 378 | var dcp_list = std.array_list.Managed(u21).init(allocator); | 337 | var dcp_list = std.array_list.Managed(u21).init(allocator); |
| 379 | defer dcp_list.deinit(); | 338 | defer dcp_list.deinit(); |
| 380 | 339 | ||
| 381 | var dc_buf: [18]u21 = undefined; | 340 | var dc_buf: [18]u21 = undefined; |
| 382 | 341 | ||
| 383 | for (cps) |cp| { | 342 | for (cps) |cp| { |
| 384 | const dc = self.decompose(cp, .nfkd, &dc_buf); | 343 | const dc = Normalize.decompose(cp, .nfkd, &dc_buf); |
| 385 | 344 | ||
| 386 | if (dc.form == .same) { | 345 | if (dc.form == .same) { |
| 387 | try dcp_list.append(cp); | 346 | try dcp_list.append(cp); |
| @@ -402,29 +361,29 @@ fn isHangul(cp: u21) bool { | |||
| 402 | } | 361 | } |
| 403 | 362 | ||
| 404 | /// Normalizes `str` to NFC. | 363 | /// Normalizes `str` to NFC. |
| 405 | pub fn nfc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { | 364 | pub fn nfc(allocator: Allocator, str: []const u8) Allocator.Error!Result { |
| 406 | return self.nfxc(allocator, str, .nfc); | 365 | return Normalize.nfxc(allocator, str, .nfc); |
| 407 | } | 366 | } |
| 408 | 367 | ||
| 409 | /// Normalizes `str` to NFKC. | 368 | /// Normalizes `str` to NFKC. |
| 410 | pub fn nfkc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { | 369 | pub fn nfkc(allocator: Allocator, str: []const u8) Allocator.Error!Result { |
| 411 | return self.nfxc(allocator, str, .nfkc); | 370 | return Normalize.nfxc(allocator, str, .nfkc); |
| 412 | } | 371 | } |
| 413 | 372 | ||
| 414 | fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { | 373 | fn nfxc(allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { |
| 415 | // Quick checks. | 374 | // Quick checks. |
| 416 | if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; | 375 | if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; |
| 417 | if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str }; | 376 | if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str }; |
| 418 | 377 | ||
| 419 | // Decompose first. | 378 | // Decompose first. |
| 420 | var dcps = if (form == .nfc) | 379 | var dcps = if (form == .nfc) |
| 421 | try self.nfxdCodePoints(allocator, str, .nfd) | 380 | try Normalize.nfxdCodePoints(allocator, str, .nfd) |
| 422 | else | 381 | else |
| 423 | try self.nfxdCodePoints(allocator, str, .nfkd); | 382 | try Normalize.nfxdCodePoints(allocator, str, .nfkd); |
| 424 | defer allocator.free(dcps); | 383 | defer allocator.free(dcps); |
| 425 | 384 | ||
| 426 | // Compose | 385 | // Compose |
| 427 | const tombstone = 0xe000; // Start of BMP Private Use Area | 386 | const tombstone = 0x1FFFF; // Convenient Cn noncharacter point |
| 428 | 387 | ||
| 429 | // Loop over all decomposed code points. | 388 | // Loop over all decomposed code points. |
| 430 | while (true) { | 389 | while (true) { |
| @@ -498,7 +457,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo | |||
| 498 | if (!processed_hangul) { | 457 | if (!processed_hangul) { |
| 499 | // L, C are not Hangul, so check for primary composite | 458 | // L, C are not Hangul, so check for primary composite |
| 500 | // in the Unicode Character Database. | 459 | // in the Unicode Character Database. |
| 501 | if (self.canon_data.toNfc(.{ L, C })) |P| { | 460 | if (CanonData.toNfc(.{ L, C })) |P| { |
| 502 | // We have a primary composite P for L, C. | 461 | // We have a primary composite P for L, C. |
| 503 | // We must check if P is not in the Full | 462 | // We must check if P is not in the Full |
| 504 | // Composition Exclusions (FCX) list, | 463 | // Composition Exclusions (FCX) list, |
| @@ -534,10 +493,8 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo | |||
| 534 | 493 | ||
| 535 | test "nfc" { | 494 | test "nfc" { |
| 536 | const allocator = testing.allocator; | 495 | const allocator = testing.allocator; |
| 537 | var n = try Normalize.init(allocator); | ||
| 538 | defer n.deinit(allocator); | ||
| 539 | 496 | ||
| 540 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 497 | const result = try Normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 541 | defer result.deinit(allocator); | 498 | defer result.deinit(allocator); |
| 542 | 499 | ||
| 543 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); | 500 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); |
| @@ -545,20 +502,18 @@ test "nfc" { | |||
| 545 | 502 | ||
| 546 | test "nfkc" { | 503 | test "nfkc" { |
| 547 | const allocator = testing.allocator; | 504 | const allocator = testing.allocator; |
| 548 | var n = try Normalize.init(allocator); | ||
| 549 | defer n.deinit(allocator); | ||
| 550 | 505 | ||
| 551 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 506 | const result = try Normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 552 | defer result.deinit(allocator); | 507 | defer result.deinit(allocator); |
| 553 | 508 | ||
| 554 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); | 509 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); |
| 555 | } | 510 | } |
| 556 | 511 | ||
| 557 | /// Tests for equality of `a` and `b` after normalizing to NFC. | 512 | /// Tests for equality of `a` and `b` after normalizing to NFC. |
| 558 | pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8) !bool { | 513 | pub fn eql(allocator: Allocator, a: []const u8, b: []const u8) !bool { |
| 559 | const norm_result_a = try self.nfc(allocator, a); | 514 | const norm_result_a = try Normalize.nfc(allocator, a); |
| 560 | defer norm_result_a.deinit(allocator); | 515 | defer norm_result_a.deinit(allocator); |
| 561 | const norm_result_b = try self.nfc(allocator, b); | 516 | const norm_result_b = try Normalize.nfc(allocator, b); |
| 562 | defer norm_result_b.deinit(allocator); | 517 | defer norm_result_b.deinit(allocator); |
| 563 | 518 | ||
| 564 | return mem.eql(u8, norm_result_a.slice, norm_result_b.slice); | 519 | return mem.eql(u8, norm_result_a.slice, norm_result_b.slice); |
| @@ -566,11 +521,9 @@ pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8) | |||
| 566 | 521 | ||
| 567 | test "eql" { | 522 | test "eql" { |
| 568 | const allocator = testing.allocator; | 523 | const allocator = testing.allocator; |
| 569 | var n = try Normalize.init(allocator); | ||
| 570 | defer n.deinit(allocator); | ||
| 571 | 524 | ||
| 572 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 525 | try testing.expect(try Normalize.eql(allocator, "foé", "foe\u{0301}")); |
| 573 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 526 | try testing.expect(try Normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| 574 | } | 527 | } |
| 575 | 528 | ||
| 576 | /// Returns true if `str` only contains Latin-1 Supplement | 529 | /// Returns true if `str` only contains Latin-1 Supplement |
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 50b8824..81ea90d 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -5,9 +5,6 @@ test "Unicode normalization tests" { | |||
| 5 | defer arena.deinit(); | 5 | defer arena.deinit(); |
| 6 | const allocator = arena.allocator(); | 6 | const allocator = arena.allocator(); |
| 7 | 7 | ||
| 8 | const n = try Normalize.init(allocator); | ||
| 9 | defer n.deinit(allocator); | ||
| 10 | |||
| 11 | var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt")); | 8 | var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt")); |
| 12 | var cp_buf: [4]u8 = undefined; | 9 | var cp_buf: [4]u8 = undefined; |
| 13 | 10 | ||
| @@ -47,7 +44,7 @@ test "Unicode normalization tests" { | |||
| 47 | } | 44 | } |
| 48 | 45 | ||
| 49 | const want = w_buf.items; | 46 | const want = w_buf.items; |
| 50 | var got = try n.nfc(allocator, input); | 47 | var got = try Normalize.nfc(allocator, input); |
| 51 | defer got.deinit(allocator); | 48 | defer got.deinit(allocator); |
| 52 | 49 | ||
| 53 | try testing.expectEqualStrings(want, got.slice); | 50 | try testing.expectEqualStrings(want, got.slice); |
| @@ -64,7 +61,7 @@ test "Unicode normalization tests" { | |||
| 64 | } | 61 | } |
| 65 | 62 | ||
| 66 | const want = w_buf.items; | 63 | const want = w_buf.items; |
| 67 | var got = try n.nfd(allocator, input); | 64 | var got = try Normalize.nfd(allocator, input); |
| 68 | defer got.deinit(allocator); | 65 | defer got.deinit(allocator); |
| 69 | 66 | ||
| 70 | try testing.expectEqualStrings(want, got.slice); | 67 | try testing.expectEqualStrings(want, got.slice); |
| @@ -81,7 +78,7 @@ test "Unicode normalization tests" { | |||
| 81 | } | 78 | } |
| 82 | 79 | ||
| 83 | const want = w_buf.items; | 80 | const want = w_buf.items; |
| 84 | var got = try n.nfkc(allocator, input); | 81 | var got = try Normalize.nfkc(allocator, input); |
| 85 | defer got.deinit(allocator); | 82 | defer got.deinit(allocator); |
| 86 | 83 | ||
| 87 | try testing.expectEqualStrings(want, got.slice); | 84 | try testing.expectEqualStrings(want, got.slice); |
| @@ -98,7 +95,7 @@ test "Unicode normalization tests" { | |||
| 98 | } | 95 | } |
| 99 | 96 | ||
| 100 | const want = w_buf.items; | 97 | const want = w_buf.items; |
| 101 | const got = try n.nfkd(allocator, input); | 98 | const got = try Normalize.nfkd(allocator, input); |
| 102 | defer got.deinit(allocator); | 99 | defer got.deinit(allocator); |
| 103 | 100 | ||
| 104 | try testing.expectEqualStrings(want, got.slice); | 101 | try testing.expectEqualStrings(want, got.slice); |