From a3b5e884b12fdaa341010ef41bb9382fa0cd89f8 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sat, 13 Sep 2025 08:38:24 -0700 Subject: Update codebase to Zig 0.15.1. Removes compression support --- src/unicode_tests.zig | 88 +++++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 41 deletions(-) (limited to 'src/unicode_tests.zig') diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ae177a9..ff49b2a 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -3,35 +3,34 @@ const dbg_print = false; test "Unicode normalization tests" { var arena = heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); - var allocator = arena.allocator(); + const allocator = arena.allocator(); const n = try Normalize.init(allocator); defer n.deinit(allocator); var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); defer file.close(); - var buf_reader = io.bufferedReader(file.reader()); - var input_stream = buf_reader.reader(); - var buf: [4096]u8 = undefined; + var reader = file.reader(&buf); + var cp_buf: [4]u8 = undefined; - var line_iter: IterRead = .{ .read = &input_stream }; + var line_iter: IterRead = .{ .read = &reader.interface }; - while (try line_iter.next(&buf)) |line| { + while (line_iter.next()) |line| { // Iterate over fields. var fields = mem.splitScalar(u8, line, ';'); var field_index: usize = 0; var input: []u8 = undefined; - defer allocator.free(input); - + if (dbg_print) std.debug.print("Line: {s}\n", .{line}); while (fields.next()) |field| : (field_index += 1) { if (field_index == 0) { - var i_buf = std.ArrayList(u8).init(allocator); + var i_buf = std.array_list.Managed(u8).init(allocator); defer i_buf.deinit(); var i_fields = mem.splitScalar(u8, field, ' '); while (i_fields.next()) |s| { + if (dbg_print) std.debug.print("Debug: {s}\n", .{s}); const icp = try fmt.parseInt(u21, s, 16); const len = try unicode.utf8Encode(icp, &cp_buf); try i_buf.appendSlice(cp_buf[0..len]); @@ -41,7 +40,7 @@ test "Unicode normalization tests" { } else if (field_index == 1) { if (dbg_print) debug.print("\n*** {s} ***\n", .{line}); // NFC, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -58,7 +57,7 @@ test "Unicode normalization tests" { try testing.expectEqualStrings(want, got.slice); } else if (field_index == 2) { // NFD, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -75,7 +74,7 @@ test "Unicode normalization tests" { try testing.expectEqualStrings(want, got.slice); } else if (field_index == 3) { // NFKC, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -92,7 +91,7 @@ test "Unicode normalization tests" { try testing.expectEqualStrings(want, got.slice); } else if (field_index == 4) { // NFKD, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -111,6 +110,11 @@ test "Unicode normalization tests" { continue; } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } } @@ -118,26 +122,25 @@ test "Segmentation GraphemeIterator" { const allocator = std.testing.allocator; var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{}); defer file.close(); - var buf_reader = std.io.bufferedReader(file.reader()); - var input_stream = buf_reader.reader(); + var buf: [4096]u8 = undefined; + var reader = file.reader(&buf); const graph = try Graphemes.init(allocator); defer graph.deinit(allocator); - var buf: [4096]u8 = undefined; - var line_iter: IterRead = .{ .read = &input_stream }; + var line_iter: IterRead = .{ .read = &reader.interface }; - while (try line_iter.next(&buf)) |raw| { + while (line_iter.next()) |raw| { // Clean up. var line = std.mem.trimLeft(u8, raw, "÷ "); if (std.mem.indexOf(u8, line, " ÷\t")) |final| { line = line[0..final]; } // Iterate over fields. - var want = std.ArrayList(Grapheme).init(allocator); + var want = std.array_list.Managed(Grapheme).init(allocator); defer want.deinit(); - var all_bytes = std.ArrayList(u8).init(allocator); + var all_bytes = std.array_list.Managed(u8).init(allocator); defer all_bytes.deinit(); var graphemes = std.mem.splitSequence(u8, line, " ÷ "); @@ -250,6 +253,11 @@ test "Segmentation GraphemeIterator" { } } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } } @@ -257,26 +265,25 @@ test "Segmentation Word Iterator" { const allocator = std.testing.allocator; var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); defer file.close(); - var buf_reader = std.io.bufferedReader(file.reader()); - var input_stream = buf_reader.reader(); + var buf: [4096]u8 = undefined; + var reader = file.reader(&buf); const wb = try Words.init(allocator); defer wb.deinit(allocator); - var buf: [4096]u8 = undefined; - var line_iter: IterRead = .{ .read = &input_stream }; + var line_iter: IterRead = .{ .read = &reader.interface }; - while (try line_iter.next(&buf)) |raw| { + while (line_iter.next()) |raw| { // Clean up. var line = std.mem.trimLeft(u8, raw, "÷ "); if (std.mem.indexOf(u8, line, " ÷\t")) |final| { line = line[0..final]; } // Iterate over fields. - var want = std.ArrayList(Word).init(allocator); + var want = std.array_list.Managed(Word).init(allocator); defer want.deinit(); - var all_bytes = std.ArrayList(u8).init(allocator); + var all_bytes = std.array_list.Managed(u8).init(allocator); defer all_bytes.deinit(); var words = std.mem.splitSequence(u8, line, " ÷ "); @@ -439,26 +446,26 @@ test "Segmentation Word Iterator" { if (idx == 0) break; } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } } const IterRead = struct { - read: *Reader, + read: *io.Reader, line: usize = 0, - pub fn next(iter: *IterRead, buf: []u8) !?[]const u8 { - defer iter.line += 1; - const maybe_line = try iter.read.readUntilDelimiterOrEof(buf, '#'); - if (maybe_line) |this_line| { - try iter.read.skipUntilDelimiterOrEof('\n'); - if (this_line.len == 0 or this_line[0] == '@') { - // comment, next line - return iter.next(buf); - } else { - return this_line; - } + pub fn next(iter: *IterRead) anyerror![]const u8 { + iter.line += 1; + const this_line = try iter.read.takeDelimiterExclusive('\n'); + if (this_line.len == 0 or this_line[0] == '@' or this_line[0] == '#') { + // comment, next line + return iter.next(); } else { - return null; + return this_line; } } }; @@ -467,7 +474,6 @@ const std = @import("std"); const fmt = std.fmt; const fs = std.fs; const io = std.io; -const Reader = io.BufferedReader(4096, fs.File.Reader).Reader; const heap = std.heap; const mem = std.mem; const debug = std.debug; -- cgit v1.2.3