From 749197a3f9d25e211615960c02380a3d659b20f9 Mon Sep 17 00:00:00 2001 From: Michael Chaten Date: Sun, 14 Sep 2025 04:11:09 -0700 Subject: Embed data files in scripts rather than relying on filesystem access for easier packaging --- build.zig | 25 +++++++++++++++++++++++++ codegen/canon.zig | 8 ++------ codegen/case_prop.zig | 9 ++------- codegen/ccc.zig | 9 ++------- codegen/compat.zig | 7 ++----- codegen/core_props.zig | 9 ++------- codegen/dwp.zig | 14 ++++---------- codegen/fold.zig | 13 ++++--------- codegen/gbp.zig | 23 +++++++++-------------- codegen/gencat.zig | 9 ++------- codegen/hangul.zig | 9 ++------- codegen/lower.zig | 8 ++------ codegen/normp.zig | 9 ++------- codegen/numeric.zig | 9 ++------- codegen/props.zig | 9 ++------- codegen/scripts.zig | 11 +++-------- codegen/upper.zig | 11 ++--------- codegen/wbp.zig | 10 +++------- src/unicode_tests.zig | 23 ++++++----------------- 19 files changed, 78 insertions(+), 147 deletions(-) diff --git a/build.zig b/build.zig index 67a009a..5678cd1 100644 --- a/build.zig +++ b/build.zig @@ -48,6 +48,9 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + gbp_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); + gbp_gen_exe.root_module.addAnonymousImport("GraphemeBreakProperty.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/GraphemeBreakProperty.txt") }); + gbp_gen_exe.root_module.addAnonymousImport("emoji-data.txt", .{ .root_source_file = b.path("data/unicode/emoji/emoji-data.txt") }); const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe); const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z"); @@ -59,6 +62,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + wbp_gen_exe.root_module.addAnonymousImport("WordBreakProperty.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/WordBreakProperty.txt") }); const run_wbp_gen_exe = b.addRunArtifact(wbp_gen_exe); const wbp_gen_out = run_wbp_gen_exe.addOutputFileArg("wbp.bin.z"); @@ -70,6 +74,8 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + dwp_gen_exe.root_module.addAnonymousImport("DerivedEastAsianWidth.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedEastAsianWidth.txt") }); + dwp_gen_exe.root_module.addAnonymousImport("DerivedGeneralCategory.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedGeneralCategory.txt") }); dwp_gen_exe.root_module.addOptions("options", dwp_options); const run_dwp_gen_exe = b.addRunArtifact(dwp_gen_exe); const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z"); @@ -83,6 +89,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + canon_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe); const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z"); @@ -94,6 +101,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + compat_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); const run_compat_gen_exe = b.addRunArtifact(compat_gen_exe); const compat_gen_out = run_compat_gen_exe.addOutputFileArg("compat.bin.z"); @@ -105,6 +113,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + hangul_gen_exe.root_module.addAnonymousImport("HangulSyllableType.txt", .{ .root_source_file = b.path("data/unicode/HangulSyllableType.txt") }); const run_hangul_gen_exe = b.addRunArtifact(hangul_gen_exe); const hangul_gen_out = run_hangul_gen_exe.addOutputFileArg("hangul.bin.z"); @@ -116,6 +125,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + normp_gen_exe.root_module.addAnonymousImport("DerivedNormalizationProps.txt", .{ .root_source_file = b.path("data/unicode/DerivedNormalizationProps.txt") }); const run_normp_gen_exe = b.addRunArtifact(normp_gen_exe); const normp_gen_out = run_normp_gen_exe.addOutputFileArg("normp.bin.z"); @@ -127,6 +137,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + ccc_gen_exe.root_module.addAnonymousImport("DerivedCombiningClass.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedCombiningClass.txt") }); const run_ccc_gen_exe = b.addRunArtifact(ccc_gen_exe); const ccc_gen_out = run_ccc_gen_exe.addOutputFileArg("ccc.bin.z"); @@ -138,6 +149,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + gencat_gen_exe.root_module.addAnonymousImport("DerivedGeneralCategory.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedGeneralCategory.txt") }); const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe); const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.bin.z"); @@ -149,6 +161,8 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + fold_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); + fold_gen_exe.root_module.addAnonymousImport("CaseFolding.txt", .{ .root_source_file = b.path("data/unicode/CaseFolding.txt") }); const run_fold_gen_exe = b.addRunArtifact(fold_gen_exe); const fold_gen_out = run_fold_gen_exe.addOutputFileArg("fold.bin.z"); @@ -161,6 +175,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + num_gen_exe.root_module.addAnonymousImport("DerivedNumericType.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedNumericType.txt") }); const run_num_gen_exe = b.addRunArtifact(num_gen_exe); const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.bin.z"); @@ -173,6 +188,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + case_prop_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); const run_case_prop_gen_exe = b.addRunArtifact(case_prop_gen_exe); const case_prop_gen_out = run_case_prop_gen_exe.addOutputFileArg("case_prop.bin.z"); @@ -185,6 +201,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + upper_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); const run_upper_gen_exe = b.addRunArtifact(upper_gen_exe); const upper_gen_out = run_upper_gen_exe.addOutputFileArg("upper.bin.z"); @@ -197,6 +214,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + lower_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe); const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z"); @@ -208,6 +226,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + scripts_gen_exe.root_module.addAnonymousImport("Scripts.txt", .{ .root_source_file = b.path("data/unicode/Scripts.txt") }); const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe); const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z"); @@ -219,6 +238,7 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + core_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); const run_core_gen_exe = b.addRunArtifact(core_gen_exe); const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z"); @@ -230,6 +250,8 @@ pub fn build(b: *std.Build) void { .optimize = .Debug, }), }); + + props_gen_exe.root_module.addAnonymousImport("PropList.txt", .{ .root_source_file = b.path("data/unicode/PropList.txt") }); const run_props_gen_exe = b.addRunArtifact(props_gen_exe); const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z"); @@ -483,6 +505,9 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }), }); + unicode_tests.root_module.addAnonymousImport("GraphemeBreakTest.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/GraphemeBreakTest.txt") }); + unicode_tests.root_module.addAnonymousImport("NormalizationTest.txt", .{ .root_source_file = b.path("data/unicode/NormalizationTest.txt") }); + unicode_tests.root_module.addAnonymousImport("WordBreakTest.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/WordBreakTest.txt") }); unicode_tests.root_module.addImport("Graphemes", graphemes); unicode_tests.root_module.addImport("Normalize", norm); unicode_tests.root_module.addImport("Words", words); diff --git a/codegen/canon.zig b/codegen/canon.zig index 89a9070..34e720f 100644 --- a/codegen/canon.zig +++ b/codegen/canon.zig @@ -6,13 +6,9 @@ pub fn main() anyerror!void { defer arena.deinit(); const allocator = arena.allocator(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -24,7 +20,7 @@ pub fn main() anyerror!void { var writer = &file_writer.interface; const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterInclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterInclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/case_prop.zig b/codegen/case_prop.zig index 66eb62c..46b8e65 100644 --- a/codegen/case_prop.zig +++ b/codegen/case_prop.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedCoreProperties.txt - var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/ccc.zig b/codegen/ccc.zig index b9b2bc3..48d3a9d 100644 --- a/codegen/ccc.zig +++ b/codegen/ccc.zig @@ -29,14 +29,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedCombiningClass.txt - var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{}); - defer cc_file.close(); - var cc_reader = cc_file.reader(&line_buf); - - while (cc_reader.interface.takeDelimiterExclusive('\n')) |line| { + var cc_reader = std.io.Reader.fixed(@embedFile("DerivedCombiningClass.txt")); + while (cc_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/compat.zig b/codegen/compat.zig index 835a636..0a06c44 100644 --- a/codegen/compat.zig +++ b/codegen/compat.zig @@ -7,12 +7,9 @@ pub fn main() anyerror!void { const allocator = arena.allocator(); // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; - var in_reader = in_file.reader(&line_buf); + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -24,7 +21,7 @@ pub fn main() anyerror!void { const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/core_props.zig b/codegen/core_props.zig index 19063f6..d59a77e 100644 --- a/codegen/core_props.zig +++ b/codegen/core_props.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedCoreProperties.txt - var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/dwp.zig b/codegen/dwp.zig index fad08d1..8189ad8 100644 --- a/codegen/dwp.zig +++ b/codegen/dwp.zig @@ -31,14 +31,10 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, i4).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedEastAsianWidth.txt - var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{}); - defer deaw_file.close(); - var deaw_reader = deaw_file.reader(&line_buf); + var deaw_reader = std.io.Reader.fixed(@embedFile("DerivedEastAsianWidth.txt")); - while (deaw_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (deaw_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; // @missing ranges @@ -94,11 +90,9 @@ pub fn main() anyerror!void { }, } // Process DerivedGeneralCategory.txt - var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{}); - defer dgc_file.close(); - var dgc_reader = dgc_file.reader(&line_buf); + var dgc_reader = std.io.Reader.fixed(@embedFile("DerivedGeneralCategory.txt")); - while (dgc_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (dgc_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/fold.zig b/codegen/fold.zig index cc381a8..d927795 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig @@ -7,16 +7,13 @@ pub fn main() anyerror!void { defer std.debug.assert(gpa.deinit() == .ok); const allocator = gpa.allocator(); - var line_buf: [4096]u8 = undefined; // Process DerivedCoreProperties.txt - var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer props_file.close(); - var props_reader = props_file.reader(&line_buf); + var props_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt")); var props_map = std.AutoHashMap(u21, void).init(allocator); defer props_map.deinit(); - props_lines: while (props_reader.interface.takeDelimiterExclusive('\n')) |line| { + props_lines: while (props_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -57,11 +54,9 @@ pub fn main() anyerror!void { defer codepoint_mapping.deinit(); // Process CaseFolding.txt - var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); - defer cp_file.close(); - var cp_reader = cp_file.reader(&line_buf); - while (cp_reader.interface.takeDelimiterExclusive('\n')) |line| { + var cp_reader = std.io.Reader.fixed(@embedFile("CaseFolding.txt")); + while (cp_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; var field_it = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/gbp.zig b/codegen/gbp.zig index d654cf1..895aa7a 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig @@ -61,14 +61,11 @@ pub fn main() anyerror!void { var emoji_set = std.AutoHashMap(u21, void).init(allocator); defer emoji_set.deinit(); - var line_buf: [4096]u8 = undefined; - // Process Indic - var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); - defer indic_file.close(); - var indic_reader = indic_file.reader(&line_buf); + const indic_file = @embedFile("DerivedCoreProperties.txt"); + var indic_reader = std.io.Reader.fixed(indic_file); - while (indic_reader.interface.takeDelimiterExclusive('\n')) |line| { + while (indic_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "InCB") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -106,11 +103,10 @@ pub fn main() anyerror!void { }, } // Process GBP - var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{}); - defer gbp_file.close(); - var gbp_reader = gbp_file.reader(&line_buf); - while (gbp_reader.interface.takeDelimiterExclusive('\n')) |line| { + var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt")); + + while (gbp_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; @@ -147,11 +143,10 @@ pub fn main() anyerror!void { }, } // Process Emoji - var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); - defer emoji_file.close(); - var emoji_reader = emoji_file.reader(&line_buf); - while (emoji_reader.interface.takeDelimiterExclusive('\n')) |line| { + var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt")); + + while (emoji_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/gencat.zig b/codegen/gencat.zig index 7dedb5d..79fa072 100644 --- a/codegen/gencat.zig +++ b/codegen/gencat.zig @@ -62,14 +62,9 @@ pub fn main() !void { var flat_map = std.AutoHashMap(u21, u5).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedGeneralCategory.txt - var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedGeneralCategory.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/hangul.zig b/codegen/hangul.zig index 6768b3f..64ab11c 100644 --- a/codegen/hangul.zig +++ b/codegen/hangul.zig @@ -38,14 +38,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u3).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process HangulSyllableType.txt - var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("HangulSyllableType.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/lower.zig b/codegen/lower.zig index c11cb0a..987f004 100644 --- a/codegen/lower.zig +++ b/codegen/lower.zig @@ -6,13 +6,9 @@ pub fn main() !void { defer arena.deinit(); const allocator = arena.allocator(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -24,7 +20,7 @@ pub fn main() !void { const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); diff --git a/codegen/normp.zig b/codegen/normp.zig index 71a1ae5..3cdd770 100644 --- a/codegen/normp.zig +++ b/codegen/normp.zig @@ -29,14 +29,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u3).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedNormalizationProps.txt - var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedNormalizationProps.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/numeric.zig b/codegen/numeric.zig index cf918e8..d6b3165 100644 --- a/codegen/numeric.zig +++ b/codegen/numeric.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process DerivedNumericType.txt - var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("DerivedNumericType.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/props.zig b/codegen/props.zig index 6ff0a33..ca42987 100644 --- a/codegen/props.zig +++ b/codegen/props.zig @@ -30,14 +30,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process PropList.txt - var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + var in_reader = std.io.Reader.fixed(@embedFile("PropList.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/scripts.zig b/codegen/scripts.zig index bdd3d9d..81511cc 100644 --- a/codegen/scripts.zig +++ b/codegen/scripts.zig @@ -203,14 +203,9 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u8).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - - // Process DerivedGeneralCategory.txt - var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + // Process Scripts.txt + var in_reader = std.io.Reader.fixed(@embedFile("Scripts.txt")); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/codegen/upper.zig b/codegen/upper.zig index 7d96a0e..108fdbd 100644 --- a/codegen/upper.zig +++ b/codegen/upper.zig @@ -6,13 +6,9 @@ pub fn main() anyerror!void { defer arena.deinit(); const allocator = arena.allocator(); - var line_buf: [4096]u8 = undefined; var write_buf: [4096]u8 = undefined; // Process UnicodeData.txt - var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - + var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); var args_iter = try std.process.argsWithAllocator(allocator); defer args_iter.deinit(); _ = args_iter.skip(); @@ -23,7 +19,7 @@ pub fn main() anyerror!void { var file_writer = out_file.writer(&write_buf); const endian = builtin.cpu.arch.endian(); - lines: while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + lines: while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0) continue; var field_iter = std.mem.splitScalar(u8, line, ';'); @@ -48,9 +44,6 @@ pub fn main() anyerror!void { } } } else |err| switch (err) { - error.ReadFailed => { - return in_reader.err orelse err; - }, error.EndOfStream => {}, else => { return err; diff --git a/codegen/wbp.zig b/codegen/wbp.zig index b1ed56a..dfdc32e 100644 --- a/codegen/wbp.zig +++ b/codegen/wbp.zig @@ -51,14 +51,10 @@ pub fn main() anyerror!void { var flat_map = std.AutoHashMap(u21, u5).init(allocator); defer flat_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process HangulSyllableType.txt - var in_file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakProperty.txt", .{}); - defer in_file.close(); - var in_reader = in_file.reader(&line_buf); - - while (in_reader.interface.takeDelimiterExclusive('\n')) |line| { + const in_file = @embedFile("WordBreakProperty.txt"); + var in_reader = std.io.Reader.fixed(in_file); + while (in_reader.takeDelimiterExclusive('\n')) |line| { if (line.len == 0 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ff49b2a..875c5f0 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig @@ -8,14 +8,10 @@ test "Unicode normalization tests" { const n = try Normalize.init(allocator); defer n.deinit(allocator); - var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); - defer file.close(); - var buf: [4096]u8 = undefined; - var reader = file.reader(&buf); - + var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt")); var cp_buf: [4]u8 = undefined; - var line_iter: IterRead = .{ .read = &reader.interface }; + var line_iter: IterRead = .{ .read = &reader }; while (line_iter.next()) |line| { // Iterate over fields. @@ -120,15 +116,12 @@ test "Unicode normalization tests" { test "Segmentation GraphemeIterator" { const allocator = std.testing.allocator; - var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{}); - defer file.close(); - var buf: [4096]u8 = undefined; - var reader = file.reader(&buf); + var reader = std.io.Reader.fixed(@embedFile("GraphemeBreakTest.txt")); const graph = try Graphemes.init(allocator); defer graph.deinit(allocator); - var line_iter: IterRead = .{ .read = &reader.interface }; + var line_iter: IterRead = .{ .read = &reader }; while (line_iter.next()) |raw| { // Clean up. @@ -263,15 +256,11 @@ test "Segmentation GraphemeIterator" { test "Segmentation Word Iterator" { const allocator = std.testing.allocator; - var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); - defer file.close(); - var buf: [4096]u8 = undefined; - var reader = file.reader(&buf); - + var reader = std.io.Reader.fixed(@embedFile("WordBreakTest.txt")); const wb = try Words.init(allocator); defer wb.deinit(allocator); - var line_iter: IterRead = .{ .read = &reader.interface }; + var line_iter: IterRead = .{ .read = &reader }; while (line_iter.next()) |raw| { // Clean up. -- cgit v1.2.3