From 95f9487f6a7bde2d7266399bdf6843b97cc1b301 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 5 Feb 2026 07:07:40 -0500 Subject: Base units do not allocate CanonData included. I may still sort out caseless matching without allocation, but that's a stretch goal. Closes #86 Closes #85 --- src/CanonData.zig | 57 ++++++-------------- src/CaseFolding.zig | 40 ++++++-------- src/Normalize.zig | 143 +++++++++++++++++--------------------------------- src/unicode_tests.zig | 11 ++-- 4 files changed, 85 insertions(+), 166 deletions(-) diff --git a/src/CanonData.zig b/src/CanonData.zig index c972534..5c1ffa6 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig @@ -1,13 +1,23 @@ //! Canonicalization Data -s1: []const u16 = undefined, -s2: []const @import("canon").Canonicalization = undefined, -nfc: std.AutoHashMapUnmanaged([2]u21, u21), +const Data = struct { + s1: []const u16 = undefined, + s2: []const @import("canon").Canonicalization = undefined, +}; + +const canon_data = canon_data: { + const canon_ = @import("canon"); + break :canon_data Data{ + .s1 = &canon_.s1, + .s2 = &canon_.s2, + }; +}; const CanonData = @This(); // There's a bug here, which is down to how static u21 vs. runtime are handled, -// the "unique representation" claim is not working out. So we do this: +// the "unique representation" claim is not working out. AutoHash casts to bytes, +// and that won't fly. So we do this: const Context = struct { pub fn hash(_: Context, cps: [2]u21) u64 { @@ -22,47 +32,14 @@ const Context = struct { const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map); -pub fn init(allocator: mem.Allocator) !CanonData { - var cdata = CanonData{ - .nfc = .empty, - }; - errdefer cdata.deinit(allocator); - - const data = @import("canon"); - cdata.s1 = &data.s1; - cdata.s2 = &data.s2; - var count: usize = 0; - for (data.composite) |cp| { - count += 1; - const cps = cdata.toNfd(cp); - std.debug.assert(cps.len == 2); - try cdata.nfc.put(allocator, cps[0..2].*, cp); - } - - // var keys = cdata.nfc.keyIterator(); - // while (keys.next()) |key| { - // const c32: [2]u32 = .{ key[0], key[1] }; - // if (c_map.get(c32)) |_| { - // std.debug.print("got", .{}); - // } - // } - - return cdata; -} - -pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void { - cdata.nfc.deinit(allocator); -} - /// Returns canonical decomposition for `cp`. -pub fn toNfd(cdata: *const CanonData, cp: u21) []const u21 { - const canon = &cdata.s2[cdata.s1[cp >> 8] + (cp & 0xff)]; +pub fn toNfd(cp: u21) []const u21 { + const canon = &canon_data.s2[canon_data.s1[cp >> 8] + (cp & 0xff)]; return canon.cps[0..canon.len]; } // Returns the primary composite for the codepoints in `cp`. -pub fn toNfc(cdata: *const CanonData, cps: [2]u21) ?u21 { - _ = cdata; +pub fn toNfc(cps: [2]u21) ?u21 { if (c_map.get(cps)) |cpp| { return cpp.*; } else { diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 88f047c..d69cddc 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig @@ -100,14 +100,13 @@ fn isCwcfException(cp: u21) bool { /// comprehensive comparison possible, but slower than `canonCaselessMatch`. pub fn compatCaselessMatch( allocator: Allocator, - normalize: Normalize, a: []const u8, b: []const u8, ) Allocator.Error!bool { if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); // Process a - const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); + const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); defer allocator.free(nfd_a); var need_free_cf_nfd_a = false; @@ -118,15 +117,15 @@ pub fn compatCaselessMatch( } defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); - const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a); + const nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfd_a); defer allocator.free(nfkd_cf_nfd_a); const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); defer allocator.free(cf_nfkd_cf_nfd_a); - const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); + const nfkd_cf_nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); // Process b - const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); + const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); defer allocator.free(nfd_b); var need_free_cf_nfd_b = false; @@ -137,11 +136,11 @@ pub fn compatCaselessMatch( } defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); - const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b); + const nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfd_b); defer allocator.free(nfkd_cf_nfd_b); const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); defer allocator.free(cf_nfkd_cf_nfd_b); - const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); + const nfkd_cf_nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); @@ -176,31 +175,27 @@ test "caseFold" { test "compatCaselessMatch" { const allocator = testing.allocator; - var normalize = try Normalize.init(allocator); - defer normalize.deinit(allocator); - - try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); + try testing.expect(try compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); const a = "Héllo World! \u{3d3}"; const b = "He\u{301}llo World! \u{3a5}\u{301}"; - try testing.expect(try compatCaselessMatch(allocator, normalize, a, b)); + try testing.expect(try compatCaselessMatch(allocator, a, b)); const c = "He\u{301}llo World! \u{3d2}\u{301}"; - try testing.expect(try compatCaselessMatch(allocator, normalize, a, c)); + try testing.expect(try compatCaselessMatch(allocator, a, c)); } /// Performs canonical caseless string matching by decomposing to NFD. This is /// faster than `compatCaselessMatch`, but less comprehensive. pub fn canonCaselessMatch( allocator: Allocator, - normalize: Normalize, a: []const u8, b: []const u8, ) Allocator.Error!bool { if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); // Process a - const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); + const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); defer allocator.free(nfd_a); var need_free_cf_nfd_a = false; @@ -214,13 +209,13 @@ pub fn canonCaselessMatch( var need_free_nfd_cf_nfd_a = false; var nfd_cf_nfd_a = cf_nfd_a; if (!need_free_cf_nfd_a) { - nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a); + nfd_cf_nfd_a = try Normalize.nfdCodePoints(allocator, cf_nfd_a); need_free_nfd_cf_nfd_a = true; } defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); // Process b - const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); + const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); defer allocator.free(nfd_b); var need_free_cf_nfd_b = false; @@ -234,7 +229,7 @@ pub fn canonCaselessMatch( var need_free_nfd_cf_nfd_b = false; var nfd_cf_nfd_b = cf_nfd_b; if (!need_free_cf_nfd_b) { - nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b); + nfd_cf_nfd_b = try Normalize.nfdCodePoints(allocator, cf_nfd_b); need_free_nfd_cf_nfd_b = true; } defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); @@ -245,17 +240,14 @@ pub fn canonCaselessMatch( test "canonCaselessMatch" { const allocator = testing.allocator; - var normalize = try Normalize.init(allocator); - defer normalize.deinit(allocator); - - try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); + try testing.expect(try canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); const a = "Héllo World! \u{3d3}"; const b = "He\u{301}llo World! \u{3a5}\u{301}"; - try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b)); + try testing.expect(!try canonCaselessMatch(allocator, a, b)); const c = "He\u{301}llo World! \u{3d2}\u{301}"; - try testing.expect(try canonCaselessMatch(allocator, normalize, a, c)); + try testing.expect(try canonCaselessMatch(allocator, a, c)); } const std = @import("std"); diff --git a/src/Normalize.zig b/src/Normalize.zig index 3191a8c..865318f 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig @@ -2,25 +2,8 @@ //! Unicode Normalization. You can normalize strings into NFC, //! NFKC, NFD, and NFKD normalization forms. -canon_data: CanonData = undefined, - const Normalize = @This(); -pub fn init(allocator: Allocator) !Normalize { - var norm: Normalize = undefined; - try norm.setup(allocator); - return norm; -} - -pub fn setup(self: *Normalize, allocator: Allocator) !void { - self.canon_data = try CanonData.init(allocator); -} - -pub fn deinit(norm: *const Normalize, allocator: Allocator) void { - const mut_norm = @constCast(norm); - mut_norm.canon_data.deinit(allocator); -} - const SBase: u21 = 0xAC00; const LBase: u21 = 0x1100; const VBase: u21 = 0x1161; @@ -91,12 +74,12 @@ const Decomp = struct { }; // `mapping` retrieves the decomposition mapping for a code point as per the UCD. -fn mapping(self: Normalize, cp: u21, form: Form) Decomp { +fn mapping(cp: u21, form: Form) Decomp { var dc = Decomp{}; switch (form) { .nfd => { - dc.cps = self.canon_data.toNfd(cp); + dc.cps = CanonData.toNfd(cp); if (dc.cps.len != 0) dc.form = .nfd; }, @@ -105,7 +88,7 @@ fn mapping(self: Normalize, cp: u21, form: Form) Decomp { if (dc.cps.len != 0) { dc.form = .nfkd; } else { - dc.cps = self.canon_data.toNfd(cp); + dc.cps = CanonData.toNfd(cp); if (dc.cps.len != 0) dc.form = .nfkd; } }, @@ -117,12 +100,7 @@ fn mapping(self: Normalize, cp: u21, form: Form) Decomp { } // `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. -fn decompose( - self: Normalize, - cp: u21, - form: Form, - buf: []u21, -) Decomp { +fn decompose(cp: u21, form: Form, buf: []u21) Decomp { // ASCII if (cp < 128) return .{}; @@ -149,7 +127,7 @@ fn decompose( // Look at previous code point in work queue. work_index -= 1; const next = work[work_index]; - const m = self.mapping(next, form); + const m = Normalize.mapping(next, form); // No more of decompositions for this code point. if (m.form == .same) { @@ -175,44 +153,41 @@ fn decompose( } test "decompose" { - const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); var buf: [18]u21 = undefined; - var dc = n.decompose('é', .nfd, &buf); + var dc = Normalize.decompose('é', .nfd, &buf); try testing.expect(dc.form == .nfd); try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]); - dc = n.decompose('\u{1e0a}', .nfd, &buf); + dc = Normalize.decompose('\u{1e0a}', .nfd, &buf); try testing.expect(dc.form == .nfd); try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]); - dc = n.decompose('\u{1e0a}', .nfkd, &buf); + dc = Normalize.decompose('\u{1e0a}', .nfkd, &buf); try testing.expect(dc.form == .nfkd); try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]); - dc = n.decompose('\u{3189}', .nfd, &buf); + dc = Normalize.decompose('\u{3189}', .nfd, &buf); try testing.expect(dc.form == .same); try testing.expect(dc.cps.len == 0); - dc = n.decompose('\u{3189}', .nfkd, &buf); + dc = Normalize.decompose('\u{3189}', .nfkd, &buf); try testing.expect(dc.form == .nfkd); try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]); - dc = n.decompose('\u{ace1}', .nfd, &buf); + dc = Normalize.decompose('\u{ace1}', .nfd, &buf); try testing.expect(dc.form == .nfd); try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]); - dc = n.decompose('\u{ace1}', .nfkd, &buf); + dc = Normalize.decompose('\u{ace1}', .nfkd, &buf); try testing.expect(dc.form == .nfd); try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]); - dc = n.decompose('\u{3d3}', .nfd, &buf); + dc = Normalize.decompose('\u{3d3}', .nfd, &buf); try testing.expect(dc.form == .nfd); try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]); - dc = n.decompose('\u{3d3}', .nfkd, &buf); + dc = Normalize.decompose('\u{3d3}', .nfkd, &buf); try testing.expect(dc.form == .nfkd); try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]); } @@ -231,8 +206,8 @@ pub const Result = struct { return .{ .allocated = true, .slice = try allocator.dupe(u8, result.slice) }; } - pub fn deinit(self: *const Result, allocator: Allocator) void { - if (self.allocated) allocator.free(self.slice); + pub fn deinit(result: *const Result, allocator: Allocator) void { + if (result.allocated) allocator.free(result.slice); } }; @@ -252,16 +227,16 @@ fn canonicalSort(cps: []u21) void { } /// Normalize `str` to NFD. -pub fn nfd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { - return self.nfxd(allocator, str, .nfd); +pub fn nfd(allocator: Allocator, str: []const u8) Allocator.Error!Result { + return Normalize.nfxd(allocator, str, .nfd); } /// Normalize `str` to NFKD. -pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { - return self.nfxd(allocator, str, .nfkd); +pub fn nfkd(allocator: Allocator, str: []const u8) Allocator.Error!Result { + return Normalize.nfxd(allocator, str, .nfkd); } -pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { +pub fn nfxdCodePoints(allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { var dcp_list = std.array_list.Managed(u21).init(allocator); defer dcp_list.deinit(); @@ -269,7 +244,7 @@ pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, fo var dc_buf: [18]u21 = undefined; while (cp_iter.next()) |cp| { - const dc = self.decompose(cp.code, form, &dc_buf); + const dc = Normalize.decompose(cp.code, form, &dc_buf); if (dc.form == .same) { try dcp_list.append(cp.code); } else { @@ -282,11 +257,11 @@ pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, fo return try dcp_list.toOwnedSlice(); } -fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { +fn nfxd(allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { // Quick checks. if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; - const dcps = try self.nfxdCodePoints(allocator, str, form); + const dcps = try Normalize.nfxdCodePoints(allocator, str, form); defer allocator.free(dcps); var dstr_list = std.array_list.Managed(u8).init(allocator); @@ -303,10 +278,8 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo test "nfd ASCII / no-alloc" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - const result = try n.nfd(allocator, "Hello World!"); + const result = try Normalize.nfd(allocator, "Hello World!"); defer result.deinit(allocator); try testing.expectEqualStrings("Hello World!", result.slice); @@ -314,10 +287,8 @@ test "nfd ASCII / no-alloc" { test "nfd !ASCII / alloc" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); + const result = try Normalize.nfd(allocator, "Héllo World! \u{3d3}"); defer result.deinit(allocator); try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); @@ -325,10 +296,8 @@ test "nfd !ASCII / alloc" { test "nfkd ASCII / no-alloc" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - const result = try n.nfkd(allocator, "Hello World!"); + const result = try Normalize.nfkd(allocator, "Hello World!"); defer result.deinit(allocator); try testing.expectEqualStrings("Hello World!", result.slice); @@ -336,27 +305,21 @@ test "nfkd ASCII / no-alloc" { test "nfkd !ASCII / alloc" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); + const result = try Normalize.nfkd(allocator, "Héllo World! \u{3d3}"); defer result.deinit(allocator); try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); } -pub fn nfdCodePoints( - self: Normalize, - allocator: Allocator, - cps: []const u21, -) Allocator.Error![]u21 { +pub fn nfdCodePoints(allocator: Allocator, cps: []const u21) Allocator.Error![]u21 { var dcp_list = std.array_list.Managed(u21).init(allocator); defer dcp_list.deinit(); var dc_buf: [18]u21 = undefined; for (cps) |cp| { - const dc = self.decompose(cp, .nfd, &dc_buf); + const dc = Normalize.decompose(cp, .nfd, &dc_buf); if (dc.form == .same) { try dcp_list.append(cp); @@ -370,18 +333,14 @@ pub fn nfdCodePoints( return try dcp_list.toOwnedSlice(); } -pub fn nfkdCodePoints( - self: Normalize, - allocator: Allocator, - cps: []const u21, -) Allocator.Error![]u21 { +pub fn nfkdCodePoints(allocator: Allocator, cps: []const u21) Allocator.Error![]u21 { var dcp_list = std.array_list.Managed(u21).init(allocator); defer dcp_list.deinit(); var dc_buf: [18]u21 = undefined; for (cps) |cp| { - const dc = self.decompose(cp, .nfkd, &dc_buf); + const dc = Normalize.decompose(cp, .nfkd, &dc_buf); if (dc.form == .same) { try dcp_list.append(cp); @@ -402,29 +361,29 @@ fn isHangul(cp: u21) bool { } /// Normalizes `str` to NFC. -pub fn nfc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { - return self.nfxc(allocator, str, .nfc); +pub fn nfc(allocator: Allocator, str: []const u8) Allocator.Error!Result { + return Normalize.nfxc(allocator, str, .nfc); } /// Normalizes `str` to NFKC. -pub fn nfkc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result { - return self.nfxc(allocator, str, .nfkc); +pub fn nfkc(allocator: Allocator, str: []const u8) Allocator.Error!Result { + return Normalize.nfxc(allocator, str, .nfkc); } -fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { +fn nfxc(allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result { // Quick checks. if (ascii.isAsciiOnly(str)) return Result{ .slice = str }; if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str }; // Decompose first. var dcps = if (form == .nfc) - try self.nfxdCodePoints(allocator, str, .nfd) + try Normalize.nfxdCodePoints(allocator, str, .nfd) else - try self.nfxdCodePoints(allocator, str, .nfkd); + try Normalize.nfxdCodePoints(allocator, str, .nfkd); defer allocator.free(dcps); // Compose - const tombstone = 0xe000; // Start of BMP Private Use Area + const tombstone = 0x1FFFF; // Convenient Cn noncharacter point // Loop over all decomposed code points. while (true) { @@ -498,7 +457,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo if (!processed_hangul) { // L, C are not Hangul, so check for primary composite // in the Unicode Character Database. - if (self.canon_data.toNfc(.{ L, C })) |P| { + if (CanonData.toNfc(.{ L, C })) |P| { // We have a primary composite P for L, C. // We must check if P is not in the Full // Composition Exclusions (FCX) list, @@ -534,10 +493,8 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo test "nfc" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); + const result = try Normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}"); defer result.deinit(allocator); try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); @@ -545,20 +502,18 @@ test "nfc" { test "nfkc" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); + const result = try Normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); defer result.deinit(allocator); try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); } /// Tests for equality of `a` and `b` after normalizing to NFC. -pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8) !bool { - const norm_result_a = try self.nfc(allocator, a); +pub fn eql(allocator: Allocator, a: []const u8, b: []const u8) !bool { + const norm_result_a = try Normalize.nfc(allocator, a); defer norm_result_a.deinit(allocator); - const norm_result_b = try self.nfc(allocator, b); + const norm_result_b = try Normalize.nfc(allocator, b); defer norm_result_b.deinit(allocator); return mem.eql(u8, norm_result_a.slice, norm_result_b.slice); @@ -566,11 +521,9 @@ pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8) test "eql" { const allocator = testing.allocator; - var n = try Normalize.init(allocator); - defer n.deinit(allocator); - try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); - try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); + try testing.expect(try Normalize.eql(allocator, "foé", "foe\u{0301}")); + try testing.expect(try Normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); } /// Returns true if `str` only contains Latin-1 Supplement diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 50b8824..81ea90d 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -5,9 +5,6 @@ test "Unicode normalization tests" { defer arena.deinit(); const allocator = arena.allocator(); - const n = try Normalize.init(allocator); - defer n.deinit(allocator); - var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt")); var cp_buf: [4]u8 = undefined; @@ -47,7 +44,7 @@ test "Unicode normalization tests" { } const want = w_buf.items; - var got = try n.nfc(allocator, input); + var got = try Normalize.nfc(allocator, input); defer got.deinit(allocator); try testing.expectEqualStrings(want, got.slice); @@ -64,7 +61,7 @@ test "Unicode normalization tests" { } const want = w_buf.items; - var got = try n.nfd(allocator, input); + var got = try Normalize.nfd(allocator, input); defer got.deinit(allocator); try testing.expectEqualStrings(want, got.slice); @@ -81,7 +78,7 @@ test "Unicode normalization tests" { } const want = w_buf.items; - var got = try n.nfkc(allocator, input); + var got = try Normalize.nfkc(allocator, input); defer got.deinit(allocator); try testing.expectEqualStrings(want, got.slice); @@ -98,7 +95,7 @@ test "Unicode normalization tests" { } const want = w_buf.items; - const got = try n.nfkd(allocator, input); + const got = try Normalize.nfkd(allocator, input); defer got.deinit(allocator); try testing.expectEqualStrings(want, got.slice); -- cgit v1.2.3