From a3b5e884b12fdaa341010ef41bb9382fa0cd89f8 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sat, 13 Sep 2025 08:38:24 -0700 Subject: Update codebase to Zig 0.15.1. Removes compression support --- codegen/gbp.zig | 57 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'codegen/gbp.zig') diff --git a/codegen/gbp.zig b/codegen/gbp.zig index 3fc4461..d654cf1 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig @@ -47,7 +47,7 @@ const BlockMap = std.HashMap( std.hash_map.default_max_load_percentage, ); -pub fn main() !void { +pub fn main() anyerror!void { var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena.deinit(); const allocator = arena.allocator(); @@ -66,10 +66,9 @@ pub fn main() !void { // Process Indic var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); defer indic_file.close(); - var indic_buf = std.io.bufferedReader(indic_file.reader()); - const indic_reader = indic_buf.reader(); + var indic_reader = indic_file.reader(&line_buf); - while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { + while (indic_reader.interface.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "InCB") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -100,15 +99,18 @@ pub fn main() !void { else => {}, } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } - // Process GBP var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{}); defer gbp_file.close(); - var gbp_buf = std.io.bufferedReader(gbp_file.reader()); - const gbp_reader = gbp_buf.reader(); + var gbp_reader = gbp_file.reader(&line_buf); - while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { + while (gbp_reader.interface.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -138,15 +140,18 @@ pub fn main() !void { else => {}, } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } - // Process Emoji var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); defer emoji_file.close(); - var emoji_buf = std.io.bufferedReader(emoji_file.reader()); - const emoji_reader = emoji_buf.reader(); + var emoji_reader = emoji_file.reader(&line_buf); - while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { + while (emoji_reader.interface.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -170,15 +175,20 @@ pub fn main() !void { else => {}, } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } var blocks_map = BlockMap.init(allocator); defer blocks_map.deinit(); - var stage1 = std.ArrayList(u16).init(allocator); + var stage1 = std.array_list.Managed(u16).init(allocator); defer stage1.deinit(); - var stage2 = std.ArrayList(u16).init(allocator); + var stage2 = std.array_list.Managed(u16).init(allocator); defer stage2.deinit(); var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator); @@ -227,22 +237,21 @@ pub fn main() !void { _ = args_iter.skip(); const output_path = args_iter.next() orelse @panic("No output file arg!"); - const compressor = std.compress.flate.deflate.compressor; + var write_buf: [4096]u8 = undefined; var out_file = try std.fs.cwd().createFile(output_path, .{}); defer out_file.close(); - var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); - const writer = out_comp.writer(); + var writer = out_file.writer(&write_buf); const endian = builtin.cpu.arch.endian(); - try writer.writeInt(u16, @intCast(stage1.items.len), endian); - for (stage1.items) |i| try writer.writeInt(u16, i, endian); + try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); + for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); - try writer.writeInt(u16, @intCast(stage2.items.len), endian); - for (stage2.items) |i| try writer.writeInt(u16, i, endian); + try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); + for (stage2.items) |i| try writer.interface.writeInt(u16, i, endian); const props_bytes = stage3.keys(); - try writer.writeInt(u16, @intCast(props_bytes.len), endian); - try writer.writeAll(props_bytes); + try writer.interface.writeInt(u16, @intCast(props_bytes.len), endian); + try writer.interface.writeAll(props_bytes); - try out_comp.flush(); + try writer.interface.flush(); } -- cgit v1.2.3 From 749197a3f9d25e211615960c02380a3d659b20f9 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sun, 14 Sep 2025 04:11:09 -0700 Subject: Embed data files in scripts rather than relying on filesystem access for easier packaging --- codegen/gbp.zig | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'codegen/gbp.zig') diff --git a/codegen/gbp.zig b/codegen/gbp.zig index d654cf1..895aa7a 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig @@ -61,14 +61,11 @@ pub fn main() anyerror!void { var emoji_set = std.AutoHashMap(u21, void).init(allocator); defer emoji_set.deinit(); - var line_buf: [4096]u8 = undefined; - // Process Indic - var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer indic_file.close(); - var indic_reader = indic_file.reader(&line_buf); + const indic_file = @embedFile("DerivedCoreProperties.txt"); + var indic_reader = std.io.Reader.fixed(indic_file); - while (indic_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (indic_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "InCB") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -106,11 +103,10 @@ pub fn main() anyerror!void { }, } // Process GBP - var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{}); - defer gbp_file.close(); - var gbp_reader = gbp_file.reader(&line_buf); - while (gbp_reader.interface.takeDelimiterExclusive('\n')) |line| { + var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt")); + + while (gbp_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -147,11 +143,10 @@ pub fn main() anyerror!void { }, } // Process Emoji - var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); - defer emoji_file.close(); - var emoji_reader = emoji_file.reader(&line_buf); - while (emoji_reader.interface.takeDelimiterExclusive('\n')) |line| { + var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt")); + + while (emoji_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; -- cgit v1.2.3 From 2f02c3b16c073d0bd3d9368a66ce272a574f75a3 Mon Sep 17 00:00:00 2001 From: Jay Date: Sat, 8 Nov 2025 19:59:08 +1100 Subject: Use takeDelimiterInclusive to support Zig 0.15.2 --- codegen/gbp.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'codegen/gbp.zig') diff --git a/codegen/gbp.zig b/codegen/gbp.zig index 895aa7a..1d06e9a 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig @@ -65,7 +65,8 @@ pub fn main() anyerror!void { const indic_file = @embedFile("DerivedCoreProperties.txt"); var indic_reader = std.io.Reader.fixed(indic_file); - while (indic_reader.takeDelimiterExclusive('\n')) |line| { + while (indic_reader.takeDelimiterInclusive('\n')) |took| { + const line = std.mem.trimRight(u8, took, "\n"); if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "InCB") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -106,7 +107,8 @@ pub fn main() anyerror!void { var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt")); - while (gbp_reader.takeDelimiterExclusive('\n')) |line| { + while (gbp_reader.takeDelimiterInclusive('\n')) |took| { + const line = std.mem.trimRight(u8, took, "\n"); if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -146,7 +148,8 @@ pub fn main() anyerror!void { var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt")); - while (emoji_reader.takeDelimiterExclusive('\n')) |line| { + while (emoji_reader.takeDelimiterInclusive('\n')) |took| { + const line = std.mem.trimRight(u8, took, "\n"); if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; -- cgit v1.2.3