From 749197a3f9d25e211615960c02380a3d659b20f9 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sun, 14 Sep 2025 04:11:09 -0700 Subject: Embed data files in scripts rather than relying on filesystem access for easier packaging --- codegen/canon.zig | 8 ++------ codegen/case_prop.zig | 9 ++------- codegen/ccc.zig | 9 ++------- codegen/compat.zig | 7 ++----- codegen/core_props.zig | 9 ++------- codegen/dwp.zig | 14 ++++---------- codegen/fold.zig | 13 ++++--------- codegen/gbp.zig | 23 +++++++++-------------- codegen/gencat.zig | 9 ++------- codegen/hangul.zig | 9 ++------- codegen/lower.zig | 8 ++------ codegen/normp.zig | 9 ++------- codegen/numeric.zig | 9 ++------- codegen/props.zig | 9 ++------- codegen/scripts.zig | 11 +++-------- codegen/upper.zig | 11 ++--------- codegen/wbp.zig | 10 +++------- 17 files changed, 47 insertions(+), 130 deletions(-) (limited to 'codegen') diff --git a/codegen/canon.zig b/codegen/canon.zig index 89a9070..34e720f 100644 --- a/codegen/canon.zig +++ b/codegen/canon.zig @@ -6,13 +6,9 @@ pub fn main() anyerror!void { defer arena.deinit(); const allocator = arena.allocator(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -24,7 +20,7 @@ pub fn main() anyerror!void { var writer = &file_writer.interface; const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterInclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterInclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/case_prop.zig b/codegen/case_prop.zig index 66eb62c..46b8e65 100644 --- a/codegen/case_prop.zig +++ b/codegen/case_prop.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedCoreProperties.txt - var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/ccc.zig b/codegen/ccc.zig index b9b2bc3..48d3a9d 100644 --- a/codegen/ccc.zig +++ b/codegen/ccc.zig @@ -29,14 +29,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedCombiningClass.txt - var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{}); - defer cc_file.close(); - var cc_reader = cc_file.reader(&line_buf); - - while (cc_reader.interface.takeDelimiterExclusive('\n')) |line| { + var cc_reader = std.io.Reader.fixed(@embedFile("DerivedCombiningClass.txt")); + while (cc_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/compat.zig b/codegen/compat.zig index 835a636..0a06c44 100644 --- a/codegen/compat.zig +++ b/codegen/compat.zig @@ -7,12 +7,9 @@ pub fn main() anyerror!void { const allocator = arena.allocator(); // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; - var in_reader = in_file.reader(&line_buf); + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -24,7 +21,7 @@ pub fn main() anyerror!void { const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/core_props.zig b/codegen/core_props.zig index 19063f6..d59a77e 100644 --- a/codegen/core_props.zig +++ b/codegen/core_props.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedCoreProperties.txt - var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/dwp.zig b/codegen/dwp.zig index fad08d1..8189ad8 100644 --- a/codegen/dwp.zig +++ b/codegen/dwp.zig @@ -31,14 +31,10 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, i4).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedEastAsianWidth.txt - var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{}); - defer deaw_file.close(); - var deaw_reader = deaw_file.reader(&line_buf); + var deaw_reader = std.io.Reader.fixed(@embedFile("DerivedEastAsianWidth.txt")); - while (deaw_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (deaw_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; // @missing ranges @@ -94,11 +90,9 @@ pub fn main() anyerror!void { }, } // Process DerivedGeneralCategory.txt - var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{}); - defer dgc_file.close(); - var dgc_reader = dgc_file.reader(&line_buf); + var dgc_reader = std.io.Reader.fixed(@embedFile("DerivedGeneralCategory.txt")); - while (dgc_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (dgc_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/fold.zig b/codegen/fold.zig index cc381a8..d927795 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig @@ -7,16 +7,13 @@ pub fn main() anyerror!void { defer std.debug.assert(gpa.deinit() == .ok); const allocator = gpa.allocator(); - var line_buf: [4096]u8 = undefined; // Process DerivedCoreProperties.txt - var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer props_file.close(); - var props_reader = props_file.reader(&line_buf); + var props_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt")); var props_map = std.AutoHashMap(u21, void).init(allocator); defer props_map.deinit(); - props_lines: while (props_reader.interface.takeDelimiterExclusive('\n')) |line| { + props_lines: while (props_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -57,11 +54,9 @@ pub fn main() anyerror!void { defer codepoint_mapping.deinit(); // Process CaseFolding.txt - var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); - defer cp_file.close(); - var cp_reader = cp_file.reader(&line_buf); - while (cp_reader.interface.takeDelimiterExclusive('\n')) |line| { + var cp_reader = std.io.Reader.fixed(@embedFile("CaseFolding.txt")); + while (cp_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; var field_it = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/gbp.zig b/codegen/gbp.zig index d654cf1..895aa7a 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig @@ -61,14 +61,11 @@ pub fn main() anyerror!void { var emoji_set = std.AutoHashMap(u21, void).init(allocator); defer emoji_set.deinit(); - var line_buf: [4096]u8 = undefined; - // Process Indic - var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer indic_file.close(); - var indic_reader = indic_file.reader(&line_buf); + const indic_file = @embedFile("DerivedCoreProperties.txt"); + var indic_reader = std.io.Reader.fixed(indic_file); - while (indic_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (indic_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "InCB") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -106,11 +103,10 @@ pub fn main() anyerror!void { }, } // Process GBP - var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{}); - defer gbp_file.close(); - var gbp_reader = gbp_file.reader(&line_buf); - while (gbp_reader.interface.takeDelimiterExclusive('\n')) |line| { + var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt")); + + while (gbp_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -147,11 +143,10 @@ pub fn main() anyerror!void { }, } // Process Emoji - var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); - defer emoji_file.close(); - var emoji_reader = emoji_file.reader(&line_buf); - while (emoji_reader.interface.takeDelimiterExclusive('\n')) |line| { + var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt")); + + while (emoji_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/gencat.zig b/codegen/gencat.zig index 7dedb5d..79fa072 100644 --- a/codegen/gencat.zig +++ b/codegen/gencat.zig @@ -62,14 +62,9 @@ pub fn main() !void { var flat_map = std.AutoHashMap(u21, u5).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedGeneralCategory.txt - var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedGeneralCategory.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/hangul.zig b/codegen/hangul.zig index 6768b3f..64ab11c 100644 --- a/codegen/hangul.zig +++ b/codegen/hangul.zig @@ -38,14 +38,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u3).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process HangulSyllableType.txt - var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("HangulSyllableType.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/lower.zig b/codegen/lower.zig index c11cb0a..987f004 100644 --- a/codegen/lower.zig +++ b/codegen/lower.zig @@ -6,13 +6,9 @@ pub fn main() !void { defer arena.deinit(); const allocator = arena.allocator(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -24,7 +20,7 @@ pub fn main() !void { const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/normp.zig b/codegen/normp.zig index 71a1ae5..3cdd770 100644 --- a/codegen/normp.zig +++ b/codegen/normp.zig @@ -29,14 +29,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u3).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedNormalizationProps.txt - var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedNormalizationProps.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/numeric.zig b/codegen/numeric.zig index cf918e8..d6b3165 100644 --- a/codegen/numeric.zig +++ b/codegen/numeric.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedNumericType.txt - var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedNumericType.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/props.zig b/codegen/props.zig index 6ff0a33..ca42987 100644 --- a/codegen/props.zig +++ b/codegen/props.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process PropList.txt - var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("PropList.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/scripts.zig b/codegen/scripts.zig index bdd3d9d..81511cc 100644 --- a/codegen/scripts.zig +++ b/codegen/scripts.zig @@ -203,14 +203,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - - // Process DerivedGeneralCategory.txt - var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + // Process Scripts.txt + var in_reader = std.io.Reader.fixed(@embedFile("Scripts.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/upper.zig b/codegen/upper.zig index 7d96a0e..108fdbd 100644 --- a/codegen/upper.zig +++ b/codegen/upper.zig @@ -6,13 +6,9 @@ pub fn main() anyerror!void { defer arena.deinit(); const allocator = arena.allocator(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -23,7 +19,7 @@ pub fn main() anyerror!void { var file_writer = out_file.writer(&write_buf); const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); @@ -48,9 +44,6 @@ pub fn main() anyerror!void { } } } else |err| switch (err) { - error.ReadFailed => { - return in_reader.err orelse err; - }, error.EndOfStream => {}, else => { return err; diff --git a/codegen/wbp.zig b/codegen/wbp.zig index b1ed56a..dfdc32e 100644 --- a/codegen/wbp.zig +++ b/codegen/wbp.zig @@ -51,14 +51,10 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u5).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process HangulSyllableType.txt - var in_file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakProperty.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + const in_file = @embedFile("WordBreakProperty.txt"); + var in_reader = std.io.Reader.fixed(in_file); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; -- cgit v1.2.3