From a3b5e884b12fdaa341010ef41bb9382fa0cd89f8 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sat, 13 Sep 2025 08:38:24 -0700 Subject: Update codebase to Zig 0.15.1. Removes compression support --- src/CanonData.zig | 4 +-- src/CaseFolding.zig | 6 ++-- src/CombiningData.zig | 5 +-- src/CompatData.zig | 5 +-- src/DisplayWidth.zig | 8 ++--- src/GeneralCategories.zig | 4 +-- src/Graphemes.zig | 4 +-- src/HangulData.zig | 5 +-- src/LetterCasing.zig | 14 +++----- src/NormPropsData.zig | 5 +-- src/Normalize.zig | 10 +++--- src/Properties.zig | 10 ++---- src/Scripts.zig | 5 +-- src/Words.zig | 4 +-- src/unicode_tests.zig | 88 +++++++++++++++++++++++++---------------------- 15 files changed, 73 insertions(+), 104 deletions(-) (limited to 'src') diff --git a/src/CanonData.zig b/src/CanonData.zig index 5d2332a..cf9dc8a 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig @@ -7,11 +7,9 @@ cps: []u21 = undefined, const CanonData = @This(); pub fn init(allocator: mem.Allocator) !CanonData { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("canon"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); var cdata = CanonData{ diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index ff41b3e..df86b92 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig @@ -48,11 +48,9 @@ fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void } inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("fold"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); @@ -123,7 +121,7 @@ pub fn caseFoldAlloc( allocator: Allocator, cps: []const u21, ) Allocator.Error![]const u21 { - var cfcps = std.ArrayList(u21).init(allocator); + var cfcps = std.array_list.Managed(u21).init(allocator); defer cfcps.deinit(); var buf: [3]u21 = undefined; diff --git a/src/CombiningData.zig b/src/CombiningData.zig index fd64a3b..f58e0de 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig @@ -6,11 +6,9 @@ s2: []u8 = undefined, const CombiningData = @This(); pub fn init(allocator: mem.Allocator) !CombiningData { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("ccc"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); @@ -46,5 +44,4 @@ pub fn isStarter(cbdata: CombiningData, cp: u21) bool { const std = @import("std"); const builtin = @import("builtin"); -const compress = std.compress; const mem = std.mem; diff --git a/src/CompatData.zig b/src/CompatData.zig index 794abca..40ecd12 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig @@ -6,11 +6,9 @@ cps: []u21 = undefined, const CompatData = @This(); pub fn init(allocator: mem.Allocator) !CompatData { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("compat"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); var cpdata = CompatData{ @@ -55,6 +53,5 @@ pub fn toNfkd(cpdata: *const CompatData, cp: u21) []u21 { const std = @import("std"); const builtin = @import("builtin"); -const compress = std.compress; const mem = std.mem; const magic = @import("magic"); diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 3da2d24..82b2649 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig @@ -39,11 +39,9 @@ pub fn setupWithGraphemes(dw: *DisplayWidth, allocator: Allocator, graphemes: Gr // Sets up the DisplayWidthData, leaving the GraphemeData undefined. pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("dwp"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); @@ -400,7 +398,7 @@ pub fn wrap( columns: usize, threshold: usize, ) ![]u8 { - var result = ArrayList(u8).init(allocator); + var result = std.array_list.Managed(u8).init(allocator); defer result.deinit(); var line_iter = mem.tokenizeAny(u8, str, "\r\n"); @@ -460,8 +458,6 @@ test "allocation test" { const std = @import("std"); const builtin = @import("builtin"); const options = @import("options"); -const ArrayList = std.ArrayList; -const compress = std.compress; const mem = std.mem; const Allocator = mem.Allocator; const simd = std.simd; diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index 8c1b6a3..eee7e56 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig @@ -47,11 +47,9 @@ pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { } pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("gencat"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); diff --git a/src/Graphemes.zig b/src/Graphemes.zig index f1c56ed..81d874c 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig @@ -16,11 +16,9 @@ pub fn init(allocator: Allocator) Allocator.Error!Graphemes { } pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("gbp"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); diff --git a/src/HangulData.zig b/src/HangulData.zig index 8c5f3ad..cae8b97 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig @@ -15,11 +15,9 @@ s2: []u3 = undefined, const Hangul = @This(); pub fn init(allocator: mem.Allocator) !Hangul { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("hangul"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); var hangul = Hangul{}; @@ -49,6 +47,5 @@ pub fn syllable(hangul: *const Hangul, cp: u21) Syllable { const std = @import("std"); const builtin = @import("builtin"); -const compress = std.compress; const mem = std.mem; const testing = std.testing; diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 11a3e96..33096fc 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig @@ -22,7 +22,6 @@ pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { } inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { - const decompressor = compress.flate.inflate.decompressor; const endian = builtin.cpu.arch.endian(); self.case_map = try allocator.alloc([2]u21, 0x110000); @@ -36,8 +35,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { // Uppercase const upper_bytes = @embedFile("upper"); var upper_fbs = std.io.fixedBufferStream(upper_bytes); - var upper_decomp = decompressor(.raw, upper_fbs.reader()); - var upper_reader = upper_decomp.reader(); + var upper_reader = upper_fbs.reader(); while (true) { const cp = try upper_reader.readInt(i24, endian); @@ -49,8 +47,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { // Lowercase const lower_bytes = @embedFile("lower"); var lower_fbs = std.io.fixedBufferStream(lower_bytes); - var lower_decomp = decompressor(.raw, lower_fbs.reader()); - var lower_reader = lower_decomp.reader(); + var lower_reader = lower_fbs.reader(); while (true) { const cp = try lower_reader.readInt(i24, endian); @@ -62,8 +59,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { // Case properties const cp_bytes = @embedFile("case_prop"); var cp_fbs = std.io.fixedBufferStream(cp_bytes); - var cp_decomp = decompressor(.raw, cp_fbs.reader()); - var cp_reader = cp_decomp.reader(); + var cp_reader = cp_fbs.reader(); const stage_1_len: u16 = try cp_reader.readInt(u16, endian); self.prop_s1 = try allocator.alloc(u16, stage_1_len); @@ -122,7 +118,7 @@ pub fn toUpperStr( allocator: mem.Allocator, str: []const u8, ) ![]u8 { - var bytes = std.ArrayList(u8).init(allocator); + var bytes = std.array_list.Managed(u8).init(allocator); defer bytes.deinit(); var iter = CodePointIterator{ .bytes = str }; @@ -180,7 +176,7 @@ pub fn toLowerStr( allocator: mem.Allocator, str: []const u8, ) ![]u8 { - var bytes = std.ArrayList(u8).init(allocator); + var bytes = std.array_list.Managed(u8).init(allocator); defer bytes.deinit(); var iter = CodePointIterator{ .bytes = str }; diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index ca69569..7b53542 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig @@ -6,11 +6,9 @@ s2: []u4 = undefined, const NormProps = @This(); pub fn init(allocator: mem.Allocator) !NormProps { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("normp"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); var norms = NormProps{}; @@ -50,6 +48,5 @@ pub fn isFcx(norms: *const NormProps, cp: u21) bool { const std = @import("std"); const builtin = @import("builtin"); -const compress = std.compress; const mem = std.mem; const testing = std.testing; diff --git a/src/Normalize.zig b/src/Normalize.zig index 989ec29..4a1bae8 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig @@ -305,7 +305,7 @@ pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Er } pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { - var dcp_list = std.ArrayList(u21).init(allocator); + var dcp_list = std.array_list.Managed(u21).init(allocator); defer dcp_list.deinit(); var cp_iter = CodePointIterator{ .bytes = str }; @@ -332,7 +332,7 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo const dcps = try self.nfxdCodePoints(allocator, str, form); defer allocator.free(dcps); - var dstr_list = std.ArrayList(u8).init(allocator); + var dstr_list = std.array_list.Managed(u8).init(allocator); defer dstr_list.deinit(); var buf: [4]u8 = undefined; @@ -393,7 +393,7 @@ pub fn nfdCodePoints( allocator: Allocator, cps: []const u21, ) Allocator.Error![]u21 { - var dcp_list = std.ArrayList(u21).init(allocator); + var dcp_list = std.array_list.Managed(u21).init(allocator); defer dcp_list.deinit(); var dc_buf: [18]u21 = undefined; @@ -418,7 +418,7 @@ pub fn nfkdCodePoints( allocator: Allocator, cps: []const u21, ) Allocator.Error![]u21 { - var dcp_list = std.ArrayList(u21).init(allocator); + var dcp_list = std.array_list.Managed(u21).init(allocator); defer dcp_list.deinit(); var dc_buf: [18]u21 = undefined; @@ -560,7 +560,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo // If we have no deletions. the code point sequence // has been fully composed. if (deleted == 0) { - var cstr_list = std.ArrayList(u8).init(allocator); + var cstr_list = std.array_list.Managed(u8).init(allocator); defer cstr_list.deinit(); var buf: [4]u8 = undefined; diff --git a/src/Properties.zig b/src/Properties.zig index 73602a0..432d176 100644 --- a/src/Properties.zig +++ b/src/Properties.zig @@ -25,14 +25,12 @@ pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { } inline fn setupInner(props: *Properties, allocator: Allocator) !void { - const decompressor = compress.flate.inflate.decompressor; const endian = builtin.cpu.arch.endian(); // Process DerivedCoreProperties.txt const core_bytes = @embedFile("core_props"); var core_fbs = std.io.fixedBufferStream(core_bytes); - var core_decomp = decompressor(.raw, core_fbs.reader()); - var core_reader = core_decomp.reader(); + var core_reader = core_fbs.reader(); const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); props.core_s1 = try allocator.alloc(u16, core_stage_1_len); @@ -47,8 +45,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void { // Process PropList.txt const props_bytes = @embedFile("props"); var props_fbs = std.io.fixedBufferStream(props_bytes); - var props_decomp = decompressor(.raw, props_fbs.reader()); - var props_reader = props_decomp.reader(); + var props_reader = props_fbs.reader(); const stage_1_len: u16 = try props_reader.readInt(u16, endian); props.props_s1 = try allocator.alloc(u16, stage_1_len); @@ -63,8 +60,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void { // Process DerivedNumericType.txt const num_bytes = @embedFile("numeric"); var num_fbs = std.io.fixedBufferStream(num_bytes); - var num_decomp = decompressor(.raw, num_fbs.reader()); - var num_reader = num_decomp.reader(); + var num_reader = num_fbs.reader(); const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); props.num_s1 = try allocator.alloc(u16, num_stage_1_len); diff --git a/src/Scripts.zig b/src/Scripts.zig index 3bc90bc..719b01f 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig @@ -196,11 +196,9 @@ pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { } inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("scripts"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); @@ -250,7 +248,6 @@ test "Allocation failure" { const std = @import("std"); const builtin = @import("builtin"); -const compress = std.compress; const mem = std.mem; const Allocator = mem.Allocator; const testing = std.testing; diff --git a/src/Words.zig b/src/Words.zig index 617c34d..ce3203f 100644 --- a/src/Words.zig +++ b/src/Words.zig @@ -605,11 +605,9 @@ const SneakIterator = struct { }; inline fn setupImpl(wb: *Words, allocator: Allocator) !void { - const decompressor = compress.flate.inflate.decompressor; const in_bytes = @embedFile("wbp"); var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); + var reader = in_fbs.reader(); const endian = builtin.cpu.arch.endian(); diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ae177a9..ff49b2a 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -3,35 +3,34 @@ const dbg_print = false; test "Unicode normalization tests" { var arena = heap.ArenaAllocator.init(testing.allocator); defer arena.deinit(); - var allocator = arena.allocator(); + const allocator = arena.allocator(); const n = try Normalize.init(allocator); defer n.deinit(allocator); var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); defer file.close(); - var buf_reader = io.bufferedReader(file.reader()); - var input_stream = buf_reader.reader(); - var buf: [4096]u8 = undefined; + var reader = file.reader(&buf); + var cp_buf: [4]u8 = undefined; - var line_iter: IterRead = .{ .read = &input_stream }; + var line_iter: IterRead = .{ .read = &reader.interface }; - while (try line_iter.next(&buf)) |line| { + while (line_iter.next()) |line| { // Iterate over fields. var fields = mem.splitScalar(u8, line, ';'); var field_index: usize = 0; var input: []u8 = undefined; - defer allocator.free(input); - + if (dbg_print) std.debug.print("Line: {s}\n", .{line}); while (fields.next()) |field| : (field_index += 1) { if (field_index == 0) { - var i_buf = std.ArrayList(u8).init(allocator); + var i_buf = std.array_list.Managed(u8).init(allocator); defer i_buf.deinit(); var i_fields = mem.splitScalar(u8, field, ' '); while (i_fields.next()) |s| { + if (dbg_print) std.debug.print("Debug: {s}\n", .{s}); const icp = try fmt.parseInt(u21, s, 16); const len = try unicode.utf8Encode(icp, &cp_buf); try i_buf.appendSlice(cp_buf[0..len]); @@ -41,7 +40,7 @@ test "Unicode normalization tests" { } else if (field_index == 1) { if (dbg_print) debug.print("\n*** {s} ***\n", .{line}); // NFC, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -58,7 +57,7 @@ test "Unicode normalization tests" { try testing.expectEqualStrings(want, got.slice); } else if (field_index == 2) { // NFD, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -75,7 +74,7 @@ test "Unicode normalization tests" { try testing.expectEqualStrings(want, got.slice); } else if (field_index == 3) { // NFKC, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -92,7 +91,7 @@ test "Unicode normalization tests" { try testing.expectEqualStrings(want, got.slice); } else if (field_index == 4) { // NFKD, time to test. - var w_buf = std.ArrayList(u8).init(allocator); + var w_buf = std.array_list.Managed(u8).init(allocator); defer w_buf.deinit(); var w_fields = mem.splitScalar(u8, field, ' '); @@ -111,6 +110,11 @@ test "Unicode normalization tests" { continue; } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } } @@ -118,26 +122,25 @@ test "Segmentation GraphemeIterator" { const allocator = std.testing.allocator; var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{}); defer file.close(); - var buf_reader = std.io.bufferedReader(file.reader()); - var input_stream = buf_reader.reader(); + var buf: [4096]u8 = undefined; + var reader = file.reader(&buf); const graph = try Graphemes.init(allocator); defer graph.deinit(allocator); - var buf: [4096]u8 = undefined; - var line_iter: IterRead = .{ .read = &input_stream }; + var line_iter: IterRead = .{ .read = &reader.interface }; - while (try line_iter.next(&buf)) |raw| { + while (line_iter.next()) |raw| { // Clean up. var line = std.mem.trimLeft(u8, raw, "÷ "); if (std.mem.indexOf(u8, line, " ÷\t")) |final| { line = line[0..final]; } // Iterate over fields. - var want = std.ArrayList(Grapheme).init(allocator); + var want = std.array_list.Managed(Grapheme).init(allocator); defer want.deinit(); - var all_bytes = std.ArrayList(u8).init(allocator); + var all_bytes = std.array_list.Managed(u8).init(allocator); defer all_bytes.deinit(); var graphemes = std.mem.splitSequence(u8, line, " ÷ "); @@ -250,6 +253,11 @@ test "Segmentation GraphemeIterator" { } } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } } @@ -257,26 +265,25 @@ test "Segmentation Word Iterator" { const allocator = std.testing.allocator; var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); defer file.close(); - var buf_reader = std.io.bufferedReader(file.reader()); - var input_stream = buf_reader.reader(); + var buf: [4096]u8 = undefined; + var reader = file.reader(&buf); const wb = try Words.init(allocator); defer wb.deinit(allocator); - var buf: [4096]u8 = undefined; - var line_iter: IterRead = .{ .read = &input_stream }; + var line_iter: IterRead = .{ .read = &reader.interface }; - while (try line_iter.next(&buf)) |raw| { + while (line_iter.next()) |raw| { // Clean up. var line = std.mem.trimLeft(u8, raw, "÷ "); if (std.mem.indexOf(u8, line, " ÷\t")) |final| { line = line[0..final]; } // Iterate over fields. - var want = std.ArrayList(Word).init(allocator); + var want = std.array_list.Managed(Word).init(allocator); defer want.deinit(); - var all_bytes = std.ArrayList(u8).init(allocator); + var all_bytes = std.array_list.Managed(u8).init(allocator); defer all_bytes.deinit(); var words = std.mem.splitSequence(u8, line, " ÷ "); @@ -439,26 +446,26 @@ test "Segmentation Word Iterator" { if (idx == 0) break; } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } } const IterRead = struct { - read: *Reader, + read: *io.Reader, line: usize = 0, - pub fn next(iter: *IterRead, buf: []u8) !?[]const u8 { - defer iter.line += 1; - const maybe_line = try iter.read.readUntilDelimiterOrEof(buf, '#'); - if (maybe_line) |this_line| { - try iter.read.skipUntilDelimiterOrEof('\n'); - if (this_line.len == 0 or this_line[0] == '@') { - // comment, next line - return iter.next(buf); - } else { - return this_line; - } + pub fn next(iter: *IterRead) anyerror![]const u8 { + iter.line += 1; + const this_line = try iter.read.takeDelimiterExclusive('\n'); + if (this_line.len == 0 or this_line[0] == '@' or this_line[0] == '#') { + // comment, next line + return iter.next(); } else { - return null; + return this_line; } } }; @@ -467,7 +474,6 @@ const std = @import("std"); const fmt = std.fmt; const fs = std.fs; const io = std.io; -const Reader = io.BufferedReader(4096, fs.File.Reader).Reader; const heap = std.heap; const mem = std.mem; const debug = std.debug; -- cgit v1.2.3 From 749197a3f9d25e211615960c02380a3d659b20f9 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sun, 14 Sep 2025 04:11:09 -0700 Subject: Embed data files in scripts rather than relying on filesystem access for easier packaging --- src/unicode_tests.zig | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ff49b2a..875c5f0 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -8,14 +8,10 @@ test "Unicode normalization tests" { const n = try Normalize.init(allocator); defer n.deinit(allocator); - var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); - defer file.close(); - var buf: [4096]u8 = undefined; - var reader = file.reader(&buf); - + var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt")); var cp_buf: [4]u8 = undefined; - var line_iter: IterRead = .{ .read = &reader.interface }; + var line_iter: IterRead = .{ .read = &reader }; while (line_iter.next()) |line| { // Iterate over fields. @@ -120,15 +116,12 @@ test "Unicode normalization tests" { test "Segmentation GraphemeIterator" { const allocator = std.testing.allocator; - var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{}); - defer file.close(); - var buf: [4096]u8 = undefined; - var reader = file.reader(&buf); + var reader = std.io.Reader.fixed(@embedFile("GraphemeBreakTest.txt")); const graph = try Graphemes.init(allocator); defer graph.deinit(allocator); - var line_iter: IterRead = .{ .read = &reader.interface }; + var line_iter: IterRead = .{ .read = &reader }; while (line_iter.next()) |raw| { // Clean up. @@ -263,15 +256,11 @@ test "Segmentation GraphemeIterator" { test "Segmentation Word Iterator" { const allocator = std.testing.allocator; - var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); - defer file.close(); - var buf: [4096]u8 = undefined; - var reader = file.reader(&buf); - + var reader = std.io.Reader.fixed(@embedFile("WordBreakTest.txt")); const wb = try Words.init(allocator); defer wb.deinit(allocator); - var line_iter: IterRead = .{ .read = &reader.interface }; + var line_iter: IterRead = .{ .read = &reader }; while (line_iter.next()) |raw| { // Clean up. -- cgit v1.2.3 From 2f02c3b16c073d0bd3d9368a66ce272a574f75a3 Mon Sep 17 00:00:00 2001 From: Jay Date: Sat, 8 Nov 2025 19:59:08 +1100 Subject: Use takeDelimiterInclusive to support Zig 0.15.2 --- src/unicode_tests.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 875c5f0..e2a5a96 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -449,7 +449,8 @@ const IterRead = struct { pub fn next(iter: *IterRead) anyerror![]const u8 { iter.line += 1; - const this_line = try iter.read.takeDelimiterExclusive('\n'); + const took = try iter.read.takeDelimiterInclusive('\n'); + const this_line = std.mem.trimRight(u8, took, "\n"); if (this_line.len == 0 or this_line[0] == '@' or this_line[0] == '#') { // comment, next line return iter.next(); -- cgit v1.2.3 From 041afd58de8525dc0b6f6a9e2b493031dbf4bbee Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Tue, 23 Dec 2025 10:22:06 -0500 Subject: Fix #74: Check for characters before popping in wrap --- src/DisplayWidth.zig | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 82b2649..d15dbae 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig @@ -420,8 +420,10 @@ pub fn wrap( } // Remove trailing space and newline. - _ = result.pop(); - _ = result.pop(); + if (result.items[result.items.len - 1] == '\n') + _ = result.pop(); + if (result.items[result.items.len - 1] == ' ') + _ = result.pop(); return try result.toOwnedSlice(); } @@ -438,6 +440,18 @@ test "wrap" { try testing.expectEqualStrings(want, got); } +test "zg/74" { + var debug_alloc = std.heap.DebugAllocator(.{}).init; + const allocator = debug_alloc.allocator(); + defer _ = debug_alloc.deinit(); + const dw = try DisplayWidth.init(allocator); + defer dw.deinit(allocator); + const wrapped = try dw.wrap(allocator, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam pellentesque pulvinar felis, sit amet commodo ligula feugiat sed. Sed quis malesuada elit, nec eleifend lectus. Sed tincidunt finibus aliquet. Praesent consectetur nibh libero, tempus imperdiet lorem congue eget.", 16, 1); + defer allocator.free(wrapped); + const expected_wrap = "Lorem ipsum dolor \nsit amet, consectetur \nadipiscing elit. \nNullam pellentesque \npulvinar felis, \nsit amet commodo \nligula feugiat \nsed. Sed quis malesuada \nelit, nec eleifend \nlectus. Sed tincidunt \nfinibus aliquet. \nPraesent consectetur \nnibh libero, tempus \nimperdiet lorem \ncongue eget."; + try std.testing.expectEqualStrings(expected_wrap, wrapped); +} + fn testAllocation(allocator: Allocator) !void { { var dw = try DisplayWidth.init(allocator); -- cgit v1.2.3 From 9f725580a2b6c93825edeff27f06951f57bcc237 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Tue, 23 Dec 2025 11:04:01 -0500 Subject: Use width 2 when skin tone modifier detected Fix: #82 --- src/DisplayWidth.zig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index d15dbae..9919a55 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig @@ -126,6 +126,8 @@ pub fn strWidth(dw: DisplayWidth, str: []const u8) usize { // emoji text sequence. if (ncp.code == 0xFE0E) w = 1; if (ncp.code == 0xFE0F) w = 2; + // Skin tones + if (0x1F3FB <= ncp.code and ncp.code <= 0x1F3FF) w = 2; } // Only adding width of first non-zero-width code point. @@ -201,6 +203,9 @@ test "strWidth" { try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); + + // https://codeberg.org/atman/zg/issues/82 + try testing.expectEqual(@as(usize, 12), dw.strWidth("✍️✍🏻✍🏼✍🏽✍🏾✍🏿")); } /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. -- cgit v1.2.3