From a2c4b7a57fe6b64bdd7c71305d408e5030af3157 Mon Sep 17 00:00:00 2001
From: Jose Colon Rodriguez
Date: Thu, 28 Mar 2024 22:19:50 -0400
Subject: Split out Unicode tests to separate file

---
 src/Normalize.zig | 121 +-----------------------------------------------------
 1 file changed, 1 insertion(+), 120 deletions(-)

(limited to 'src/Normalize.zig')

diff --git a/src/Normalize.zig b/src/Normalize.zig
index daf774d..f437f4f 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -3,12 +3,10 @@
 //! NFKC, NFD, and NFKD normalization forms.
 
 const std = @import("std");
-const assert = std.debug.assert;
 const debug = std.debug;
+const assert = debug.assert;
 const fmt = std.fmt;
-const fs = std.fs;
 const heap = std.heap;
-const io = std.io;
 const mem = std.mem;
 const simd = std.simd;
 const testing = std.testing;
@@ -615,123 +613,6 @@ test "isFcd" {
     try testing.expect(!n.isFcd(not_fcd));
 }
 
-test "Unicode normalization tests" {
-    var arena = heap.ArenaAllocator.init(testing.allocator);
-    defer arena.deinit();
-    var allocator = arena.allocator();
-
-    const data = try NormData.init(allocator);
-    defer data.deinit();
-    const n = Self{ .norm_data = &data };
-
-    var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{});
-    defer file.close();
-    var buf_reader = io.bufferedReader(file.reader());
-    const input_stream = buf_reader.reader();
-
-    var line_no: usize = 0;
-    var buf: [4096]u8 = undefined;
-    var cp_buf: [4]u8 = undefined;
-
-    while (try input_stream.readUntilDelimiterOrEof(&buf, '\n')) |line| {
-        line_no += 1;
-        // Skip comments or empty lines.
-        if (line.len == 0 or line[0] == '#' or line[0] == '@') continue;
-        // Iterate over fields.
-        var fields = mem.split(u8, line, ";");
-        var field_index: usize = 0;
-        var input: []u8 = undefined;
-        defer allocator.free(input);
-
-        while (fields.next()) |field| : (field_index += 1) {
-            if (field_index == 0) {
-                var i_buf = std.ArrayList(u8).init(allocator);
-                defer i_buf.deinit();
-
-                var i_fields = mem.split(u8, field, " ");
-                while (i_fields.next()) |s| {
-                    const icp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(icp, &cp_buf);
-                    try i_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                input = try i_buf.toOwnedSlice();
-            } else if (field_index == 1) {
-                //debug.print("\n*** {s} ***\n", .{line});
-                // NFC, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfc(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else if (field_index == 2) {
-                // NFD, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfd(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else if (field_index == 3) {
-                // NFKC, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                var got = try n.nfkc(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else if (field_index == 4) {
-                // NFKD, time to test.
-                var w_buf = std.ArrayList(u8).init(allocator);
-                defer w_buf.deinit();
-
-                var w_fields = mem.split(u8, field, " ");
-                while (w_fields.next()) |s| {
-                    const wcp = try fmt.parseInt(u21, s, 16);
-                    const len = try unicode.utf8Encode(wcp, &cp_buf);
-                    try w_buf.appendSlice(cp_buf[0..len]);
-                }
-
-                const want = w_buf.items;
-                const got = try n.nfkd(allocator, input);
-                defer got.deinit();
-
-                try testing.expectEqualStrings(want, got.slice);
-            } else {
-                continue;
-            }
-        }
-    }
-}
-
 /// Returns true if `str` only contains Latin-1 Supplement
 /// code points. Uses SIMD if possible.
 pub fn isLatin1Only(str: []const u8) bool {
-- 
cgit v1.2.3