From 95f9487f6a7bde2d7266399bdf6843b97cc1b301 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 5 Feb 2026 07:07:40 -0500 Subject: Base units do not allocate CanonData included. I may still sort out caseless matching without allocation, but that's a stretch goal. Closes #86 Closes #85 --- src/CaseFolding.zig | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'src/CaseFolding.zig') diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 88f047c..d69cddc 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig @@ -100,14 +100,13 @@ fn isCwcfException(cp: u21) bool { /// comprehensive comparison possible, but slower than `canonCaselessMatch`. pub fn compatCaselessMatch( allocator: Allocator, - normalize: Normalize, a: []const u8, b: []const u8, ) Allocator.Error!bool { if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); // Process a - const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); + const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); defer allocator.free(nfd_a); var need_free_cf_nfd_a = false; @@ -118,15 +117,15 @@ pub fn compatCaselessMatch( } defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a); - const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a); + const nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfd_a); defer allocator.free(nfkd_cf_nfd_a); const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a); defer allocator.free(cf_nfkd_cf_nfd_a); - const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); + const nfkd_cf_nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a); defer allocator.free(nfkd_cf_nfkd_cf_nfd_a); // Process b - const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); + const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); defer allocator.free(nfd_b); var need_free_cf_nfd_b = false; @@ -137,11 +136,11 @@ pub fn compatCaselessMatch( } defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b); - const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b); + const nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfd_b); defer allocator.free(nfkd_cf_nfd_b); const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b); defer allocator.free(cf_nfkd_cf_nfd_b); - const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); + const nfkd_cf_nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b); defer allocator.free(nfkd_cf_nfkd_cf_nfd_b); return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b); @@ -176,31 +175,27 @@ test "caseFold" { test "compatCaselessMatch" { const allocator = testing.allocator; - var normalize = try Normalize.init(allocator); - defer normalize.deinit(allocator); - - try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); + try testing.expect(try compatCaselessMatch(allocator, "ascii only!", "ASCII Only!")); const a = "Héllo World! \u{3d3}"; const b = "He\u{301}llo World! \u{3a5}\u{301}"; - try testing.expect(try compatCaselessMatch(allocator, normalize, a, b)); + try testing.expect(try compatCaselessMatch(allocator, a, b)); const c = "He\u{301}llo World! \u{3d2}\u{301}"; - try testing.expect(try compatCaselessMatch(allocator, normalize, a, c)); + try testing.expect(try compatCaselessMatch(allocator, a, c)); } /// Performs canonical caseless string matching by decomposing to NFD. This is /// faster than `compatCaselessMatch`, but less comprehensive. pub fn canonCaselessMatch( allocator: Allocator, - normalize: Normalize, a: []const u8, b: []const u8, ) Allocator.Error!bool { if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b); // Process a - const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd); + const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd); defer allocator.free(nfd_a); var need_free_cf_nfd_a = false; @@ -214,13 +209,13 @@ pub fn canonCaselessMatch( var need_free_nfd_cf_nfd_a = false; var nfd_cf_nfd_a = cf_nfd_a; if (!need_free_cf_nfd_a) { - nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a); + nfd_cf_nfd_a = try Normalize.nfdCodePoints(allocator, cf_nfd_a); need_free_nfd_cf_nfd_a = true; } defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a); // Process b - const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd); + const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd); defer allocator.free(nfd_b); var need_free_cf_nfd_b = false; @@ -234,7 +229,7 @@ pub fn canonCaselessMatch( var need_free_nfd_cf_nfd_b = false; var nfd_cf_nfd_b = cf_nfd_b; if (!need_free_cf_nfd_b) { - nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b); + nfd_cf_nfd_b = try Normalize.nfdCodePoints(allocator, cf_nfd_b); need_free_nfd_cf_nfd_b = true; } defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b); @@ -245,17 +240,14 @@ pub fn canonCaselessMatch( test "canonCaselessMatch" { const allocator = testing.allocator; - var normalize = try Normalize.init(allocator); - defer normalize.deinit(allocator); - - try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!")); + try testing.expect(try canonCaselessMatch(allocator, "ascii only!", "ASCII Only!")); const a = "Héllo World! \u{3d3}"; const b = "He\u{301}llo World! \u{3a5}\u{301}"; - try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b)); + try testing.expect(!try canonCaselessMatch(allocator, a, b)); const c = "He\u{301}llo World! \u{3d2}\u{301}"; - try testing.expect(try canonCaselessMatch(allocator, normalize, a, c)); + try testing.expect(try canonCaselessMatch(allocator, a, c)); } const std = @import("std"); -- cgit v1.2.3