From bcd79d29b316f636af9d21c8ace61e9ba93180d9 Mon Sep 17 00:00:00 2001
From: Jose Colon Rodriguez
Date: Sat, 23 Mar 2024 20:32:13 -0400
Subject: Rename CaseFold and Normalize

---
 src/Normalizer.zig | 774 -----------------------------------------------------
 1 file changed, 774 deletions(-)
 delete mode 100644 src/Normalizer.zig

(limited to 'src/Normalizer.zig')

diff --git a/src/Normalizer.zig b/src/Normalizer.zig
deleted file mode 100644
index b5a54d1..0000000
--- a/src/Normalizer.zig
+++ /dev/null
@@ -1,774 +0,0 @@
-//! Normalizer contains functions and methods that implement
-//! Unicode Normalization. You can normalize strings into NFC,
-//! NFKC, NFD, and NFKD normalization forms.
-
-const std = @import("std");
-const assert = std.debug.assert;
-const debug = std.debug;
-const fmt = std.fmt;
-const fs = std.fs;
-const heap = std.heap;
-const io = std.io;
-const mem = std.mem;
-const simd = std.simd;
-const testing = std.testing;
-const unicode = std.unicode;
-
-const ascii = @import("ascii");
-const CodePointIterator = @import("code_point").Iterator;
-pub const NormData = @import("NormData");
-
-norm_data: *const NormData,
-
-const Self = @This();
-
-const SBase: u21 = 0xAC00;
-const LBase: u21 = 0x1100;
-const VBase: u21 = 0x1161;
-const TBase: u21 = 0x11A7;
-const LCount: u21 = 19;
-const VCount: u21 = 21;
-const TCount: u21 = 28;
-const NCount: u21 = 588; // VCount * TCount
-const SCount: u21 = 11172; // LCount * NCount
-
-fn decomposeHangul(self: Self, cp: u21, buf: []u21) ?Decomp {
-    const kind = self.norm_data.hangul_data.syllable(cp);
-    if (kind != .LV and kind != .LVT) return null;
-
-    const SIndex: u21 = cp - SBase;
-    const LIndex: u21 = SIndex / NCount;
-    const VIndex: u21 = (SIndex % NCount) / TCount;
-    const TIndex: u21 = SIndex % TCount;
-    const LPart: u21 = LBase + LIndex;
-    const VPart: u21 = VBase + VIndex;
-
-    var dc = Decomp{ .form = .nfd };
-    buf[0] = LPart;
-    buf[1] = VPart;
-
-    if (TIndex == 0) {
-        dc.cps = buf[0..2];
-        return dc;
-    }
-
-    // TPart
-    buf[2] = TBase + TIndex;
-    dc.cps = buf[0..3];
-    return dc;
-}
-
-fn composeHangulCanon(lv: u21, t: u21) u21 {
-    assert(0x11A8 <= t and t <= 0x11C2);
-    return lv + (t - TBase);
-}
-
-fn composeHangulFull(l: u21, v: u21, t: u21) u21 {
-    assert(0x1100 <= l and l <= 0x1112);
-    assert(0x1161 <= v and v <= 0x1175);
-    const LIndex = l - LBase;
-    const VIndex = v - VBase;
-    const LVIndex = LIndex * NCount + VIndex * TCount;
-
-    if (t == 0) return SBase + LVIndex;
-
-    assert(0x11A8 <= t and t <= 0x11C2);
-    const TIndex = t - TBase;
-
-    return SBase + LVIndex + TIndex;
-}
-
-const Form = enum {
-    nfc,
-    nfd,
-    nfkc,
-    nfkd,
-    same,
-};
-
-const Decomp = struct {
-    form: Form = .same,
-    cps: []const u21 = &.{},
-};
-
-/// `mapping` retrieves the decomposition mapping for a code point as per the UCD.
-pub fn mapping(self: Self, cp: u21, form: Form) Decomp {
-    var dc = Decomp{};
-
-    switch (form) {
-        .nfd => {
-            dc.cps = self.norm_data.canon_data.toNfd(cp);
-            if (dc.cps.len != 0) dc.form = .nfd;
-        },
-
-        .nfkd => {
-            dc.cps = self.norm_data.compat_data.toNfkd(cp);
-            if (dc.cps.len != 0) {
-                dc.form = .nfkd;
-            } else {
-                dc.cps = self.norm_data.canon_data.toNfd(cp);
-                if (dc.cps.len != 0) dc.form = .nfkd;
-            }
-        },
-
-        else => @panic("Normalizer.mapping only accepts form .nfd or .nfkd."),
-    }
-
-    return dc;
-}
-
-/// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
-pub fn decompose(
-    self: Self,
-    cp: u21,
-    form: Form,
-    buf: []u21,
-) Decomp {
-    // ASCII
-    if (cp < 128) return .{};
-
-    // NFD / NFKD quick checks.
-    switch (form) {
-        .nfd => if (self.norm_data.normp_data.isNfd(cp)) return .{},
-        .nfkd => if (self.norm_data.normp_data.isNfkd(cp)) return .{},
-        else => @panic("Normalizer.decompose only accepts form .nfd or .nfkd."),
-    }
-
-    // Hangul precomposed syllable full decomposition.
-    if (self.decomposeHangul(cp, buf)) |dc| return dc;
-
-    // Full decomposition.
-    var dc = Decomp{ .form = form };
-
-    var result_index: usize = 0;
-    var work_index: usize = 1;
-
-    // Start work with argument code point.
-    var work = [_]u21{cp} ++ [_]u21{0} ** 17;
-
-    while (work_index > 0) {
-        // Look at previous code point in work queue.
-        work_index -= 1;
-        const next = work[work_index];
-        const m = self.mapping(next, form);
-
-        // No more of decompositions for this code point.
-        if (m.form == .same) {
-            buf[result_index] = next;
-            result_index += 1;
-            continue;
-        }
-
-        // Work backwards through decomposition.
-        // `i` starts at 1 because m_last is 1 past the last code point.
-        var i: usize = 1;
-        while (i <= m.cps.len) : ({
-            i += 1;
-            work_index += 1;
-        }) {
-            work[work_index] = m.cps[m.cps.len - i];
-        }
-    }
-
-    dc.cps = buf[0..result_index];
-
-    return dc;
-}
-
-test "decompose" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var buf: [18]u21 = undefined;
-
-    var dc = n.decompose('é', .nfd, &buf);
-    try testing.expect(dc.form == .nfd);
-    try testing.expectEqualSlices(u21, &[_]u21{ 'e', '\u{301}' }, dc.cps[0..2]);
-
-    dc = n.decompose('\u{1e0a}', .nfd, &buf);
-    try testing.expect(dc.form == .nfd);
-    try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
-
-    dc = n.decompose('\u{1e0a}', .nfkd, &buf);
-    try testing.expect(dc.form == .nfkd);
-    try testing.expectEqualSlices(u21, &[_]u21{ 'D', '\u{307}' }, dc.cps[0..2]);
-
-    dc = n.decompose('\u{3189}', .nfd, &buf);
-    try testing.expect(dc.form == .same);
-    try testing.expect(dc.cps.len == 0);
-
-    dc = n.decompose('\u{3189}', .nfkd, &buf);
-    try testing.expect(dc.form == .nfkd);
-    try testing.expectEqualSlices(u21, &[_]u21{'\u{1188}'}, dc.cps[0..1]);
-
-    dc = n.decompose('\u{ace1}', .nfd, &buf);
-    try testing.expect(dc.form == .nfd);
-    try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
-
-    dc = n.decompose('\u{ace1}', .nfkd, &buf);
-    try testing.expect(dc.form == .nfd);
-    try testing.expectEqualSlices(u21, &[_]u21{ '\u{1100}', '\u{1169}', '\u{11a8}' }, dc.cps[0..3]);
-
-    dc = n.decompose('\u{3d3}', .nfd, &buf);
-    try testing.expect(dc.form == .nfd);
-    try testing.expectEqualSlices(u21, &[_]u21{ '\u{3d2}', '\u{301}' }, dc.cps[0..2]);
-
-    dc = n.decompose('\u{3d3}', .nfkd, &buf);
-    try testing.expect(dc.form == .nfkd);
-    try testing.expectEqualSlices(u21, &[_]u21{ '\u{3a5}', '\u{301}' }, dc.cps[0..2]);
-}
-
-/// Returned from various functions in this namespace. Remember to call `deinit` to free any allocated memory.
-pub const Result = struct {
-    allocator: ?mem.Allocator = null,
-    slice: []const u8,
-
-    pub fn deinit(self: *Result) void {
-        if (self.allocator) |allocator| allocator.free(self.slice);
-    }
-};
-
-// Compares code points by Canonical Combining Class order.
-fn cccLess(self: Self, lhs: u21, rhs: u21) bool {
-    return self.norm_data.ccc_data.ccc(lhs) < self.norm_data.ccc_data.ccc(rhs);
-}
-
-// Applies the Canonical Sorting Algorithm.
-fn canonicalSort(self: Self, cps: []u21) void {
-    var i: usize = 0;
-    while (i < cps.len) : (i += 1) {
-        const start: usize = i;
-        while (i < cps.len and self.norm_data.ccc_data.ccc(cps[i]) != 0) : (i += 1) {}
-        mem.sort(u21, cps[start..i], self, cccLess);
-    }
-}
-
-/// Normalize `str` to NFD.
-pub fn nfd(self: Self, allocator: mem.Allocator, str: []const u8) !Result {
-    return self.nfxd(allocator, str, .nfd);
-}
-
-/// Normalize `str` to NFKD.
-pub fn nfkd(self: Self, allocator: mem.Allocator, str: []const u8) !Result {
-    return self.nfxd(allocator, str, .nfkd);
-}
-
-pub fn nfxdCodePoints(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) ![]u21 {
-    var dcp_list = std.ArrayList(u21).init(allocator);
-    defer dcp_list.deinit();
-
-    var cp_iter = CodePointIterator{ .bytes = str };
-    var dc_buf: [18]u21 = undefined;
-
-    while (cp_iter.next()) |cp| {
-        const dc = self.decompose(cp.code, form, &dc_buf);
-        if (dc.form == .same) {
-            try dcp_list.append(cp.code);
-        } else {
-            try dcp_list.appendSlice(dc.cps);
-        }
-    }
-
-    self.canonicalSort(dcp_list.items);
-
-    return try dcp_list.toOwnedSlice();
-}
-
-fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) !Result {
-    // Quick checks.
-    if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
-
-    const dcps = try self.nfxdCodePoints(allocator, str, form);
-    defer allocator.free(dcps);
-
-    var dstr_list = std.ArrayList(u8).init(allocator);
-    defer dstr_list.deinit();
-    var buf: [4]u8 = undefined;
-
-    for (dcps) |dcp| {
-        const len = try unicode.utf8Encode(dcp, &buf);
-        try dstr_list.appendSlice(buf[0..len]);
-    }
-
-    return Result{ .allocator = allocator, .slice = try dstr_list.toOwnedSlice() };
-}
-
-test "nfd ASCII / no-alloc" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var result = try n.nfd(allocator, "Hello World!");
-    defer result.deinit();
-
-    try testing.expectEqualStrings("Hello World!", result.slice);
-}
-
-test "nfd !ASCII / alloc" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var result = try n.nfd(allocator, "Héllo World! \u{3d3}");
-    defer result.deinit();
-
-    try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice);
-}
-
-test "nfkd ASCII / no-alloc" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var result = try n.nfkd(allocator, "Hello World!");
-    defer result.deinit();
-
-    try testing.expectEqualStrings("Hello World!", result.slice);
-}
-
-test "nfkd !ASCII / alloc" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var result = try n.nfkd(allocator, "Héllo World! \u{3d3}");
-    defer result.deinit();
-
-    try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice);
-}
-
-pub fn nfdCodePoints(
-    self: Self,
-    allocator: mem.Allocator,
-    cps: []const u21,
-) ![]u21 {
-    var dcp_list = std.ArrayList(u21).init(allocator);
-    defer dcp_list.deinit();
-
-    var dc_buf: [18]u21 = undefined;
-
-    for (cps) |cp| {
-        const dc = self.decompose(cp, .nfd, &dc_buf);
-
-        if (dc.form == .same) {
-            try dcp_list.append(cp);
-        } else {
-            try dcp_list.appendSlice(dc.cps);
-        }
-    }
-
-    self.canonicalSort(dcp_list.items);
-
-    return try dcp_list.toOwnedSlice();
-}
-
-pub fn nfkdCodePoints(
-    self: Self,
-    allocator: mem.Allocator,
-    cps: []const u21,
-) ![]u21 {
-    var dcp_list = std.ArrayList(u21).init(allocator);
-    defer dcp_list.deinit();
-
-    var dc_buf: [18]u21 = undefined;
-
-    for (cps) |cp| {
-        const dc = self.decompose(cp, .nfkd, &dc_buf);
-
-        if (dc.form == .same) {
-            try dcp_list.append(cp);
-        } else {
-            try dcp_list.appendSlice(dc.cps);
-        }
-    }
-
-    self.canonicalSort(dcp_list.items);
-
-    return try dcp_list.toOwnedSlice();
-}
-
-// Composition (NFC, NFKC)
-
-fn isHangul(self: Self, cp: u21) bool {
-    return cp >= 0x1100 and self.norm_data.hangul_data.syllable(cp) != .none;
-}
-
-/// Normalizes `str` to NFC.
-pub fn nfc(self: Self, allocator: mem.Allocator, str: []const u8) !Result {
-    return self.nfxc(allocator, str, .nfc);
-}
-
-/// Normalizes `str` to NFKC.
-pub fn nfkc(self: Self, allocator: mem.Allocator, str: []const u8) !Result {
-    return self.nfxc(allocator, str, .nfkc);
-}
-
-fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) !Result {
-    // Quick checks.
-    if (ascii.isAsciiOnly(str)) return Result{ .slice = str };
-    if (form == .nfc and isLatin1Only(str)) return Result{ .slice = str };
-
-    // Decompose first.
-    var dcps = if (form == .nfc)
-        try self.nfxdCodePoints(allocator, str, .nfd)
-    else
-        try self.nfxdCodePoints(allocator, str, .nfkd);
-    defer allocator.free(dcps);
-
-    // Compose
-    const tombstone = 0xe000; // Start of BMP Private Use Area
-
-    // Loop over all decomposed code points.
-    while (true) {
-        var i: usize = 1; // start at second code point.
-        var deleted: usize = 0;
-
-        // For each code point, C, find the preceding
-        // starter code point L, if any.
-        block_check: while (i < dcps.len) : (i += 1) {
-            const C = dcps[i];
-            if (C == tombstone) continue :block_check;
-            const cc_C = self.norm_data.ccc_data.ccc(C);
-            var starter_index: ?usize = null;
-            var j: usize = i;
-
-            // Seek back to find starter L, if any.
-            while (true) {
-                j -= 1;
-                if (dcps[j] == tombstone) continue;
-
-                // Check for starter.
-                if (self.norm_data.ccc_data.isStarter(dcps[j])) {
-                    // Check for blocking conditions.
-                    for (dcps[(j + 1)..i]) |B| {
-                        if (B == tombstone) continue;
-                        const cc_B = self.norm_data.ccc_data.ccc(B);
-                        if (cc_B != 0 and self.isHangul(C)) continue :block_check;
-                        if (cc_B >= cc_C) continue :block_check;
-                    }
-
-                    // Found starter at j.
-                    starter_index = j;
-                    break;
-                }
-
-                if (j == 0) break;
-            }
-
-            // If we have a starter L, see if there's a primary
-            // composite, P, for the sequence L, C. If so, we must
-            // repace L with P and delete C.
-            if (starter_index) |sidx| {
-                const L = dcps[sidx];
-                var processed_hangul = false;
-
-                // If L and C are Hangul syllables, we can compose
-                // them algorithmically if possible.
-                if (self.isHangul(L) and self.isHangul(C)) {
-                    // Get Hangul syllable types.
-                    const l_stype = self.norm_data.hangul_data.syllable(L);
-                    const c_stype = self.norm_data.hangul_data.syllable(C);
-
-                    if (l_stype == .LV and c_stype == .T) {
-                        // LV, T canonical composition.
-                        dcps[sidx] = composeHangulCanon(L, C);
-                        dcps[i] = tombstone; // Mark for deletion.
-                        processed_hangul = true;
-                    }
-
-                    if (l_stype == .L and c_stype == .V) {
-                        // L, V full composition. L, V, T is handled via main loop.
-                        dcps[sidx] = composeHangulFull(L, C, 0);
-                        dcps[i] = tombstone; // Mark for deletion.
-                        processed_hangul = true;
-                    }
-
-                    if (processed_hangul) deleted += 1;
-                }
-
-                // If no composition has occurred yet.
-                if (!processed_hangul) {
-                    // L, C are not Hangul, so check for primary composite
-                    // in the Unicode Character Database.
-                    if (self.norm_data.canon_data.toNfc(.{ L, C })) |P| {
-                        // We have a primary composite P for L, C.
-                        // We must check if P is not in the Full
-                        // Composition Exclusions  (FCX) list,
-                        // preventing it from appearing in any
-                        // composed form (NFC, NFKC).
-                        if (!self.norm_data.normp_data.isFcx(P)) {
-                            dcps[sidx] = P;
-                            dcps[i] = tombstone; // Mark for deletion.
-                            deleted += 1;
-                        }
-                    }
-                }
-            }
-        }
-
-        // If we have no deletions. the code point sequence
-        // has been fully composed.
-        if (deleted == 0) {
-            var cstr_list = std.ArrayList(u8).init(allocator);
-            defer cstr_list.deinit();
-            var buf: [4]u8 = undefined;
-
-            for (dcps) |cp| {
-                if (cp == tombstone) continue; // "Delete"
-                const len = try unicode.utf8Encode(cp, &buf);
-                try cstr_list.appendSlice(buf[0..len]);
-            }
-
-            return Result{ .allocator = allocator, .slice = try cstr_list.toOwnedSlice() };
-        }
-    }
-}
-
-test "nfc" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}");
-    defer result.deinit();
-
-    try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice);
-}
-
-test "nfkc" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}");
-    defer result.deinit();
-
-    try testing.expectEqualStrings("Complex char: \u{038E}", result.slice);
-}
-
-/// Tests for equality of `a` and `b` after normalizing to NFC.
-pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool {
-    var norm_result_a = try self.nfc(allocator, a);
-    defer norm_result_a.deinit();
-    var norm_result_b = try self.nfc(allocator, b);
-    defer norm_result_b.deinit();
-
-    return mem.eql(u8, norm_result_a.slice, norm_result_b.slice);
-}
-
-test "eql" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}"));
-    try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}"));
-}
-
-// FCD
-fn getLeadCcc(self: Self, cp: u21) u8 {
-    const dc = self.mapping(cp, .nfd);
-    const dcp = if (dc.form == .same) cp else dc.cps[0];
-    return self.norm_data.ccc_data.ccc(dcp);
-}
-
-fn getTrailCcc(self: Self, cp: u21) u8 {
-    const dc = self.mapping(cp, .nfd);
-    const dcp = if (dc.form == .same) cp else dc.cps[dc.cps.len - 1];
-    return self.norm_data.ccc_data.ccc(dcp);
-}
-
-/// Fast check to detect if a string is already in NFC or NFD form.
-pub fn isFcd(self: Self, str: []const u8) bool {
-    var prev_ccc: u8 = 0;
-    var cp_iter = CodePointIterator{ .bytes = str };
-
-    return while (cp_iter.next()) |cp| {
-        const ccc = self.getLeadCcc(cp.code);
-        if (ccc != 0 and ccc < prev_ccc) break false;
-        prev_ccc = self.getTrailCcc(cp.code);
-    } else true;
-}
-
-test "isFcd" {
-    const allocator = testing.allocator;
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    const is_nfc = "José \u{3D3}";
-    try testing.expect(n.isFcd(is_nfc));
-
-    const is_nfd = "Jose\u{301} \u{3d2}\u{301}";
-    try testing.expect(n.isFcd(is_nfd));
-
-    const not_fcd = "Jose\u{301} \u{3d2}\u{315}\u{301}";
-    try testing.expect(!n.isFcd(not_fcd));
-}
-
-test "Unicode normalization tests" {
-    var arena = heap.ArenaAllocator.init(testing.allocator);
-    defer arena.deinit();
-    var allocator = arena.allocator();
-
-    var data = try NormData.init(allocator);
-    defer data.deinit();
-    var n = Self{ .norm_data = &data };
-
-    var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
-    defer file.close();
-    var buf_reader = io.bufferedReader(file.reader());
-    const input_stream = buf_reader.reader();
-
-    var line_no: usize = 0;
-    var buf: [4096]u8 = undefined;
-    var cp_buf: [4]u8 = undefined;
-
-    while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
-        line_no += 1;
-        // Skip comments or empty lines.
-        if (line.len == 0 or line[0] == '#' or line[0] == '@') continue;
-        // Iterate over fields.
-        var fields = mem.split(u8, line, ";");
-        var field_index: usize = 0;
-        var input: []u8 = undefined;
-        defer allocator.free(input);
-
-        while (fields.next()) |field| : (field_index += 1) {
-            if (field_index == 0) {
-                var i_buf = std.ArrayList(u8).init(allocator);
-                defer i_buf.deinit();
-
-                var i_fields = mem.split(u8, field, " ");
-                while (i_fields.next()) |s| {
-                    const icp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(icp, &cp_buf);
-                    try i_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                input = try i_buf.toOwnedSlice();
-            } else if (field_index == 1) {
-                //debug.print("\n*** {s} ***\n", .{line});
-                // NFC, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfc(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else if (field_index == 2) {
-                // NFD, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfd(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else if (field_index == 3) {
-                // NFKC, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfkc(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else if (field_index == 4) {
-                // NFKD, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfkd(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else {
-                continue;
-            }
-        }
-    }
-}
-
-/// Returns true if `str` only contains Latin-1 Supplement
-/// code points. Uses SIMD if possible.
-pub fn isLatin1Only(str: []const u8) bool {
-    var cp_iter = CodePointIterator{ .bytes = str };
-
-    const vec_len = simd.suggestVectorLength(u21) orelse return blk: {
-        break :blk while (cp_iter.next()) |cp| {
-            if (cp.code > 256) break false;
-        } else true;
-    };
-
-    const Vec = @Vector(vec_len, u21);
-
-    outer: while (true) {
-        var v1: Vec = undefined;
-        const saved_cp_i = cp_iter.i;
-
-        for (0..vec_len) |i| {
-            if (cp_iter.next()) |cp| {
-                v1[i] = cp.code;
-            } else {
-                cp_iter.i = saved_cp_i;
-                break :outer;
-            }
-        }
-        const v2: Vec = @splat(256);
-        if (@reduce(.Or, v1 > v2)) return false;
-    }
-
-    return while (cp_iter.next()) |cp| {
-        if (cp.code > 256) break false;
-    } else true;
-}
-
-test "isLatin1Only" {
-    const latin1_only = "Hello, World! \u{fe} \u{ff}";
-    try testing.expect(isLatin1Only(latin1_only));
-    const not_latin1_only = "Héllo, World! \u{3d3}";
-    try testing.expect(!isLatin1Only(not_latin1_only));
-}
-- 
cgit v1.2.3