From 95f9487f6a7bde2d7266399bdf6843b97cc1b301 Mon Sep 17 00:00:00 2001
From: Sam Atman
Date: Thu, 5 Feb 2026 07:07:40 -0500
Subject: Base units do not allocate

CanonData included.  I may still sort out caseless matching without
allocation, but that's a stretch goal.

Closes #86
Closes #85
---
 src/CanonData.zig     |  57 ++++++--------------
 src/CaseFolding.zig   |  40 ++++++--------
 src/Normalize.zig     | 143 +++++++++++++++++---------------------------------
 src/unicode_tests.zig |  11 ++--
 4 files changed, 85 insertions(+), 166 deletions(-)

diff --git a/src/CanonData.zig b/src/CanonData.zig
index c972534..5c1ffa6 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -1,13 +1,23 @@
 //! Canonicalization Data
 
-s1: []const u16 = undefined,
-s2: []const @import("canon").Canonicalization = undefined,
-nfc: std.AutoHashMapUnmanaged([2]u21, u21),
+const Data = struct {
+    s1: []const u16 = undefined,
+    s2: []const @import("canon").Canonicalization = undefined,
+};
+
+const canon_data = canon_data: {
+    const canon_ = @import("canon");
+    break :canon_data Data{
+        .s1 = &canon_.s1,
+        .s2 = &canon_.s2,
+    };
+};
 
 const CanonData = @This();
 
 // There's a bug here, which is down to how static u21 vs. runtime are handled,
-// the "unique representation" claim is not working out.  So we do this:
+// the "unique representation" claim is not working out.  AutoHash casts to bytes,
+// and that won't fly.  So we do this:
 
 const Context = struct {
     pub fn hash(_: Context, cps: [2]u21) u64 {
@@ -22,47 +32,14 @@ const Context = struct {
 
 const c_map = comptime_map.ComptimeHashMap([2]u21, u21, Context, @import("canon").c_map);
 
-pub fn init(allocator: mem.Allocator) !CanonData {
-    var cdata = CanonData{
-        .nfc = .empty,
-    };
-    errdefer cdata.deinit(allocator);
-
-    const data = @import("canon");
-    cdata.s1 = &data.s1;
-    cdata.s2 = &data.s2;
-    var count: usize = 0;
-    for (data.composite) |cp| {
-        count += 1;
-        const cps = cdata.toNfd(cp);
-        std.debug.assert(cps.len == 2);
-        try cdata.nfc.put(allocator, cps[0..2].*, cp);
-    }
-
-    // var keys = cdata.nfc.keyIterator();
-    // while (keys.next()) |key| {
-    //     const c32: [2]u32 = .{ key[0], key[1] };
-    //     if (c_map.get(c32)) |_| {
-    //         std.debug.print("got", .{});
-    //     }
-    // }
-
-    return cdata;
-}
-
-pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void {
-    cdata.nfc.deinit(allocator);
-}
-
 /// Returns canonical decomposition for `cp`.
-pub fn toNfd(cdata: *const CanonData, cp: u21) []const u21 {
-    const canon = &cdata.s2[cdata.s1[cp >> 8] + (cp & 0xff)];
+pub fn toNfd(cp: u21) []const u21 {
+    const canon = &canon_data.s2[canon_data.s1[cp >> 8] + (cp & 0xff)];
     return canon.cps[0..canon.len];
 }
 
 // Returns the primary composite for the codepoints in `cp`.
-pub fn toNfc(cdata: *const CanonData, cps: [2]u21) ?u21 {
-    _ = cdata;
+pub fn toNfc(cps: [2]u21) ?u21 {
     if (c_map.get(cps)) |cpp| {
         return cpp.*;
     } else {
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index 88f047c..d69cddc 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -100,14 +100,13 @@ fn isCwcfException(cp: u21) bool {
 /// comprehensive comparison possible, but slower than `canonCaselessMatch`.
 pub fn compatCaselessMatch(
     allocator: Allocator,
-    normalize: Normalize,
     a: []const u8,
     b: []const u8,
 ) Allocator.Error!bool {
     if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
 
     // Process a
-    const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd);
+    const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd);
     defer allocator.free(nfd_a);
 
     var need_free_cf_nfd_a = false;
@@ -118,15 +117,15 @@ pub fn compatCaselessMatch(
     }
     defer if (need_free_cf_nfd_a) allocator.free(cf_nfd_a);
 
-    const nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfd_a);
+    const nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfd_a);
     defer allocator.free(nfkd_cf_nfd_a);
     const cf_nfkd_cf_nfd_a = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_a);
     defer allocator.free(cf_nfkd_cf_nfd_a);
-    const nfkd_cf_nfkd_cf_nfd_a = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
+    const nfkd_cf_nfkd_cf_nfd_a = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_a);
     defer allocator.free(nfkd_cf_nfkd_cf_nfd_a);
 
     // Process b
-    const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd);
+    const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd);
     defer allocator.free(nfd_b);
 
     var need_free_cf_nfd_b = false;
@@ -137,11 +136,11 @@ pub fn compatCaselessMatch(
     }
     defer if (need_free_cf_nfd_b) allocator.free(cf_nfd_b);
 
-    const nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfd_b);
+    const nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfd_b);
     defer allocator.free(nfkd_cf_nfd_b);
     const cf_nfkd_cf_nfd_b = try CaseFolding.caseFoldAlloc(allocator, nfkd_cf_nfd_b);
     defer allocator.free(cf_nfkd_cf_nfd_b);
-    const nfkd_cf_nfkd_cf_nfd_b = try normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
+    const nfkd_cf_nfkd_cf_nfd_b = try Normalize.nfkdCodePoints(allocator, cf_nfkd_cf_nfd_b);
     defer allocator.free(nfkd_cf_nfkd_cf_nfd_b);
 
     return mem.eql(u21, nfkd_cf_nfkd_cf_nfd_a, nfkd_cf_nfkd_cf_nfd_b);
@@ -176,31 +175,27 @@ test "caseFold" {
 test "compatCaselessMatch" {
     const allocator = testing.allocator;
 
-    var normalize = try Normalize.init(allocator);
-    defer normalize.deinit(allocator);
-
-    try testing.expect(try compatCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!"));
+    try testing.expect(try compatCaselessMatch(allocator, "ascii only!", "ASCII Only!"));
 
     const a = "Héllo World! \u{3d3}";
     const b = "He\u{301}llo World! \u{3a5}\u{301}";
-    try testing.expect(try compatCaselessMatch(allocator, normalize, a, b));
+    try testing.expect(try compatCaselessMatch(allocator, a, b));
 
     const c = "He\u{301}llo World! \u{3d2}\u{301}";
-    try testing.expect(try compatCaselessMatch(allocator, normalize, a, c));
+    try testing.expect(try compatCaselessMatch(allocator, a, c));
 }
 
 /// Performs canonical caseless string matching by decomposing to NFD. This is
 /// faster than `compatCaselessMatch`, but less comprehensive.
 pub fn canonCaselessMatch(
     allocator: Allocator,
-    normalize: Normalize,
     a: []const u8,
     b: []const u8,
 ) Allocator.Error!bool {
     if (ascii.isAsciiOnly(a) and ascii.isAsciiOnly(b)) return std.ascii.eqlIgnoreCase(a, b);
 
     // Process a
-    const nfd_a = try normalize.nfxdCodePoints(allocator, a, .nfd);
+    const nfd_a = try Normalize.nfxdCodePoints(allocator, a, .nfd);
     defer allocator.free(nfd_a);
 
     var need_free_cf_nfd_a = false;
@@ -214,13 +209,13 @@ pub fn canonCaselessMatch(
     var need_free_nfd_cf_nfd_a = false;
     var nfd_cf_nfd_a = cf_nfd_a;
     if (!need_free_cf_nfd_a) {
-        nfd_cf_nfd_a = try normalize.nfdCodePoints(allocator, cf_nfd_a);
+        nfd_cf_nfd_a = try Normalize.nfdCodePoints(allocator, cf_nfd_a);
         need_free_nfd_cf_nfd_a = true;
     }
     defer if (need_free_nfd_cf_nfd_a) allocator.free(nfd_cf_nfd_a);
 
     // Process b
-    const nfd_b = try normalize.nfxdCodePoints(allocator, b, .nfd);
+    const nfd_b = try Normalize.nfxdCodePoints(allocator, b, .nfd);
     defer allocator.free(nfd_b);
 
     var need_free_cf_nfd_b = false;
@@ -234,7 +229,7 @@ pub fn canonCaselessMatch(
     var need_free_nfd_cf_nfd_b = false;
     var nfd_cf_nfd_b = cf_nfd_b;
     if (!need_free_cf_nfd_b) {
-        nfd_cf_nfd_b = try normalize.nfdCodePoints(allocator, cf_nfd_b);
+        nfd_cf_nfd_b = try Normalize.nfdCodePoints(allocator, cf_nfd_b);
         need_free_nfd_cf_nfd_b = true;
     }
     defer if (need_free_nfd_cf_nfd_b) allocator.free(nfd_cf_nfd_b);
@@ -245,17 +240,14 @@ pub fn canonCaselessMatch(
 test "canonCaselessMatch" {
     const allocator = testing.allocator;
 
-    var normalize = try Normalize.init(allocator);
-    defer normalize.deinit(allocator);
-
-    try testing.expect(try canonCaselessMatch(allocator, normalize, "ascii only!", "ASCII Only!"));
+    try testing.expect(try canonCaselessMatch(allocator, "ascii only!", "ASCII Only!"));
 
     const a = "Héllo World! \u{3d3}";
     const b = "He\u{301}llo World! \u{3a5}\u{301}";
-    try testing.expect(!try canonCaselessMatch(allocator, normalize, a, b));
+    try testing.expect(!try canonCaselessMatch(allocator, a, b));
 
     const c = "He\u{301}llo World! \u{3d2}\u{301}";
-    try testing.expect(try canonCaselessMatch(allocator, normalize, a, c));
+    try testing.expect(try canonCaselessMatch(allocator, a, c));
 }
 
 const std = @import("std");
diff --git a/src/Normalize.zig b/src/Normalize.zig
index 3191a8c..865318f 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -2,25 +2,8 @@
 //! Unicode Normalization. You can normalize strings into NFC,
 //! NFKC, NFD, and NFKD normalization forms.
 
-canon_data: CanonData = undefined,
-
 const Normalize = @This();
 
-pub fn init(allocator: Allocator) !Normalize {
-    var norm: Normalize = undefined;
-    try norm.setup(allocator);
-    return norm;
-}
-
-pub fn setup(self: *Normalize, allocator: Allocator) !void {
-    self.canon_data = try CanonData.init(allocator);
-}
-
-pub fn deinit(norm: *const Normalize, allocator: Allocator) void {
-    const mut_norm = @constCast(norm);
-    mut_norm.canon_data.deinit(allocator);
-}
-
 const SBase: u21 = 0xAC00;
 const LBase: u21 = 0x1100;
 const VBase: u21 = 0x1161;
@@ -91,12 +74,12 @@ const Decomp = struct {
 };
 
 // `mapping` retrieves the decomposition mapping for a code point as per the UCD.
-fn mapping(self: Normalize, cp: u21, form: Form) Decomp {
+fn mapping(cp: u21, form: Form) Decomp {
     var dc = Decomp{};
 
     switch (form) {
         .nfd => {
-            dc.cps = self.canon_data.toNfd(cp);
+            dc.cps = CanonData.toNfd(cp);
             if (dc.cps.len != 0) dc.form = .nfd;
         },
 
@@ -105,7 +88,7 @@ fn mapping(self: Normalize, cp: u21, form: Form) Decomp {
             if (dc.cps.len != 0) {
                 dc.form = .nfkd;
             } else {
-                dc.cps = self.canon_data.toNfd(cp);
+                dc.cps = CanonData.toNfd(cp);
                 if (dc.cps.len != 0) dc.form = .nfkd;
             }
         },
@@ -117,12 +100,7 @@ fn mapping(self: Normalize, cp: u21, form: Form) Decomp {
 }
 
 // `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
-fn decompose(
-    self: Normalize,
-    cp: u21,
-    form: Form,
-    buf: []u21,
-) Decomp {
+fn decompose(cp: u21, form: Form, buf: []u21) Decomp {
     // ASCII
     if (cp < 128) return .{};
 
@@ -149,7 +127,7 @@ fn decompose(
         // Look at previous code point in work queue.
         work_index -= 1;
         const next = work[work_index];
-        const m = self.mapping(next, form);
+        const m = Normalize.mapping(next, form);
 
         // No more of decompositions for this code point.
         if (m.form == .same) {
@@ -175,44 +153,41 @@ fn decompose(
 }
 
 test "decompose" {
-    const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
     var buf: [18]u21 = undefined;
 
-    var dc = n.decompose('é', .nfd, &buf);
+    var dc = Normalize.decompose('é', .nfd, &buf);
     try testing.expect(dc.form == .nfd);
     try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]);
 
-    dc = n.decompose('\u{1e0a}', .nfd, &buf);
+    dc = Normalize.decompose('\u{1e0a}', .nfd, &buf);
     try testing.expect(dc.form == .nfd);
     try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
 
-    dc = n.decompose('\u{1e0a}', .nfkd, &buf);
+    dc = Normalize.decompose('\u{1e0a}', .nfkd, &buf);
     try testing.expect(dc.form == .nfkd);
     try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
 
-    dc = n.decompose('\u{3189}', .nfd, &buf);
+    dc = Normalize.decompose('\u{3189}', .nfd, &buf);
     try testing.expect(dc.form == .same);
     try testing.expect(dc.cps.len == 0);
 
-    dc = n.decompose('\u{3189}', .nfkd, &buf);
+    dc = Normalize.decompose('\u{3189}', .nfkd, &buf);
     try testing.expect(dc.form == .nfkd);
     try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]);
 
-    dc = n.decompose('\u{ace1}', .nfd, &buf);
+    dc = Normalize.decompose('\u{ace1}', .nfd, &buf);
     try testing.expect(dc.form == .nfd);
     try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
 
-    dc = n.decompose('\u{ace1}', .nfkd, &buf);
+    dc = Normalize.decompose('\u{ace1}', .nfkd, &buf);
     try testing.expect(dc.form == .nfd);
     try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
 
-    dc = n.decompose('\u{3d3}', .nfd, &buf);
+    dc = Normalize.decompose('\u{3d3}', .nfd, &buf);
     try testing.expect(dc.form == .nfd);
     try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]);
 
-    dc = n.decompose('\u{3d3}', .nfkd, &buf);
+    dc = Normalize.decompose('\u{3d3}', .nfkd, &buf);
     try testing.expect(dc.form == .nfkd);
     try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]);
 }
@@ -231,8 +206,8 @@ pub const Result = struct {
         return .{ .allocated = true, .slice = try allocator.dupe(u8, result.slice) };
     }
 
-    pub fn deinit(self: *const Result, allocator: Allocator) void {
-        if (self.allocated) allocator.free(self.slice);
+    pub fn deinit(result: *const Result, allocator: Allocator) void {
+        if (result.allocated) allocator.free(result.slice);
     }
 };
 
@@ -252,16 +227,16 @@ fn canonicalSort(cps: []u21) void {
 }
 
 /// Normalize `str` to NFD.
-pub fn nfd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
-    return self.nfxd(allocator, str, .nfd);
+pub fn nfd(allocator: Allocator, str: []const u8) Allocator.Error!Result {
+    return Normalize.nfxd(allocator, str, .nfd);
 }
 
 /// Normalize `str` to NFKD.
-pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
-    return self.nfxd(allocator, str, .nfkd);
+pub fn nfkd(allocator: Allocator, str: []const u8) Allocator.Error!Result {
+    return Normalize.nfxd(allocator, str, .nfkd);
 }
 
-pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 {
+pub fn nfxdCodePoints(allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 {
     var dcp_list = std.array_list.Managed(u21).init(allocator);
     defer dcp_list.deinit();
 
@@ -269,7 +244,7 @@ pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, fo
     var dc_buf: [18]u21 = undefined;
 
     while (cp_iter.next()) |cp| {
-        const dc = self.decompose(cp.code, form, &dc_buf);
+        const dc = Normalize.decompose(cp.code, form, &dc_buf);
         if (dc.form == .same) {
             try dcp_list.append(cp.code);
         } else {
@@ -282,11 +257,11 @@ pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, fo
     return try dcp_list.toOwnedSlice();
 }
 
-fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result {
+fn nfxd(allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result {
     // Quick checks.
     if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
 
-    const dcps = try self.nfxdCodePoints(allocator, str, form);
+    const dcps = try Normalize.nfxdCodePoints(allocator, str, form);
     defer allocator.free(dcps);
 
     var dstr_list = std.array_list.Managed(u8).init(allocator);
@@ -303,10 +278,8 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
 
 test "nfd ASCII / no-alloc" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    const result = try n.nfd(allocator, "Hello World!");
+    const result = try Normalize.nfd(allocator, "Hello World!");
     defer result.deinit(allocator);
 
     try testing.expectEqualStrings("Hello World!", result.slice);
@@ -314,10 +287,8 @@ test "nfd ASCII / no-alloc" {
 
 test "nfd !ASCII / alloc" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    const result = try n.nfd(allocator, "Héllo World! \u{3d3}");
+    const result = try Normalize.nfd(allocator, "Héllo World! \u{3d3}");
     defer result.deinit(allocator);
 
     try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
@@ -325,10 +296,8 @@ test "nfd !ASCII / alloc" {
 
 test "nfkd ASCII / no-alloc" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    const result = try n.nfkd(allocator, "Hello World!");
+    const result = try Normalize.nfkd(allocator, "Hello World!");
     defer result.deinit(allocator);
 
     try testing.expectEqualStrings("Hello World!", result.slice);
@@ -336,27 +305,21 @@ test "nfkd ASCII / no-alloc" {
 
 test "nfkd !ASCII / alloc" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    const result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
+    const result = try Normalize.nfkd(allocator, "Héllo World! \u{3d3}");
     defer result.deinit(allocator);
 
     try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
 }
 
-pub fn nfdCodePoints(
-    self: Normalize,
-    allocator: Allocator,
-    cps: []const u21,
-) Allocator.Error![]u21 {
+pub fn nfdCodePoints(allocator: Allocator, cps: []const u21) Allocator.Error![]u21 {
     var dcp_list = std.array_list.Managed(u21).init(allocator);
     defer dcp_list.deinit();
 
     var dc_buf: [18]u21 = undefined;
 
     for (cps) |cp| {
-        const dc = self.decompose(cp, .nfd, &dc_buf);
+        const dc = Normalize.decompose(cp, .nfd, &dc_buf);
 
         if (dc.form == .same) {
             try dcp_list.append(cp);
@@ -370,18 +333,14 @@ pub fn nfdCodePoints(
     return try dcp_list.toOwnedSlice();
 }
 
-pub fn nfkdCodePoints(
-    self: Normalize,
-    allocator: Allocator,
-    cps: []const u21,
-) Allocator.Error![]u21 {
+pub fn nfkdCodePoints(allocator: Allocator, cps: []const u21) Allocator.Error![]u21 {
     var dcp_list = std.array_list.Managed(u21).init(allocator);
     defer dcp_list.deinit();
 
     var dc_buf: [18]u21 = undefined;
 
     for (cps) |cp| {
-        const dc = self.decompose(cp, .nfkd, &dc_buf);
+        const dc = Normalize.decompose(cp, .nfkd, &dc_buf);
 
         if (dc.form == .same) {
             try dcp_list.append(cp);
@@ -402,29 +361,29 @@ fn isHangul(cp: u21) bool {
 }
 
 /// Normalizes `str` to NFC.
-pub fn nfc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
-    return self.nfxc(allocator, str, .nfc);
+pub fn nfc(allocator: Allocator, str: []const u8) Allocator.Error!Result {
+    return Normalize.nfxc(allocator, str, .nfc);
 }
 
 /// Normalizes `str` to NFKC.
-pub fn nfkc(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Error!Result {
-    return self.nfxc(allocator, str, .nfkc);
+pub fn nfkc(allocator: Allocator, str: []const u8) Allocator.Error!Result {
+    return Normalize.nfxc(allocator, str, .nfkc);
 }
 
-fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result {
+fn nfxc(allocator: Allocator, str: []const u8, form: Form) Allocator.Error!Result {
     // Quick checks.
     if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
     if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str };
 
     // Decompose first.
     var dcps = if (form == .nfc)
-        try self.nfxdCodePoints(allocator, str, .nfd)
+        try Normalize.nfxdCodePoints(allocator, str, .nfd)
     else
-        try self.nfxdCodePoints(allocator, str, .nfkd);
+        try Normalize.nfxdCodePoints(allocator, str, .nfkd);
     defer allocator.free(dcps);
 
     // Compose
-    const tombstone = 0xe000; // Start of BMP Private Use Area
+    const tombstone = 0x1FFFF; // Convenient Cn noncharacter point
 
     // Loop over all decomposed code points.
     while (true) {
@@ -498,7 +457,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
                 if (!processed_hangul) {
                     // L, C are not Hangul, so check for primary composite
                     // in the Unicode Character Database.
-                    if (self.canon_data.toNfc(.{ L, C })) |P| {
+                    if (CanonData.toNfc(.{ L, C })) |P| {
                         // We have a primary composite P for L, C.
                         // We must check if P is not in the Full
                         // Composition Exclusions  (FCX) list,
@@ -534,10 +493,8 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
 
 test "nfc" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
+    const result = try Normalize.nfc(allocator, "Complex char: \u{3D2}\u{301}");
     defer result.deinit(allocator);
 
     try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
@@ -545,20 +502,18 @@ test "nfc" {
 
 test "nfkc" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
+    const result = try Normalize.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
     defer result.deinit(allocator);
 
     try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
 }
 
 /// Tests for equality of `a` and `b` after normalizing to NFC.
-pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8) !bool {
-    const norm_result_a = try self.nfc(allocator, a);
+pub fn eql(allocator: Allocator, a: []const u8, b: []const u8) !bool {
+    const norm_result_a = try Normalize.nfc(allocator, a);
     defer norm_result_a.deinit(allocator);
-    const norm_result_b = try self.nfc(allocator, b);
+    const norm_result_b = try Normalize.nfc(allocator, b);
     defer norm_result_b.deinit(allocator);
 
     return mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
@@ -566,11 +521,9 @@ pub fn eql(self: Normalize, allocator: Allocator, a: []const u8, b: []const u8)
 
 test "eql" {
     const allocator = testing.allocator;
-    var n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
 
-    try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
-    try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
+    try testing.expect(try Normalize.eql(allocator, "foé", "foe\u{0301}"));
+    try testing.expect(try Normalize.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
 }
 
 /// Returns true if `str` only contains Latin-1 Supplement
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 50b8824..81ea90d 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -5,9 +5,6 @@ test "Unicode normalization tests" {
     defer arena.deinit();
     const allocator = arena.allocator();
 
-    const n = try Normalize.init(allocator);
-    defer n.deinit(allocator);
-
     var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt"));
     var cp_buf: [4]u8 = undefined;
 
@@ -47,7 +44,7 @@ test "Unicode normalization tests" {
                 }
 
                 const want = w_buf.items;
-                var got = try n.nfc(allocator, input);
+                var got = try Normalize.nfc(allocator, input);
                 defer got.deinit(allocator);
 
                 try testing.expectEqualStrings(want, got.slice);
@@ -64,7 +61,7 @@ test "Unicode normalization tests" {
                 }
 
                 const want = w_buf.items;
-                var got = try n.nfd(allocator, input);
+                var got = try Normalize.nfd(allocator, input);
                 defer got.deinit(allocator);
 
                 try testing.expectEqualStrings(want, got.slice);
@@ -81,7 +78,7 @@ test "Unicode normalization tests" {
                 }
 
                 const want = w_buf.items;
-                var got = try n.nfkc(allocator, input);
+                var got = try Normalize.nfkc(allocator, input);
                 defer got.deinit(allocator);
 
                 try testing.expectEqualStrings(want, got.slice);
@@ -98,7 +95,7 @@ test "Unicode normalization tests" {
                 }
 
                 const want = w_buf.items;
-                const got = try n.nfkd(allocator, input);
+                const got = try Normalize.nfkd(allocator, input);
                 defer got.deinit(allocator);
 
                 try testing.expectEqualStrings(want, got.slice);
-- 
cgit v1.2.3