From ba5d9081b479e95ffa7f3baf751beedd370cec14 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:01:36 -0500 Subject: Normalization and case folding Both of which deserve some further attention. --- codegen/fold.zig | 57 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 19 deletions(-) (limited to 'codegen/fold.zig') diff --git a/codegen/fold.zig b/codegen/fold.zig index 366ed79..c5f54eb 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig @@ -228,26 +228,45 @@ pub fn main() anyerror!void { var out_file = try std.fs.cwd().createFile(output_path, .{}); defer out_file.close(); var writer = out_file.writer(&write_buf); - - const endian = builtin.cpu.arch.endian(); // Table metadata. - try writer.interface.writeInt(u24, @intCast(codepoint_cutoff), endian); - try writer.interface.writeInt(u24, @intCast(multiple_codepoint_start), endian); - // Stage 1 - try writer.interface.writeInt(u16, @intCast(meaningful_stage1.len), endian); - try writer.interface.writeAll(meaningful_stage1); - // Stage 2 - try writer.interface.writeInt(u16, @intCast(stage2.len), endian); - try writer.interface.writeAll(stage2); - // Stage 3 - try writer.interface.writeInt(u16, @intCast(stage3.len), endian); - for (stage3) |offset| try writer.interface.writeInt(i24, offset, endian); - // Changes when case folded - // Min and max - try writer.interface.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian); - try writer.interface.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian); - try writer.interface.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian); - for (changes_when_casefolded_exceptions.items) |cp| try writer.interface.writeInt(u24, cp, endian); + try writer.interface.print( + \\//! This file is auto-generated. Do not edit. + \\ + \\pub const cutoff: u21 = {}; + \\pub const cwcf_exceptions_min: u21 = {}; + \\pub const cwcf_exceptions_max: u21 = {}; + \\pub const cwcf_exceptions: [{}]u21 = .{{ + , .{ codepoint_cutoff, std.mem.min(u21, changes_when_casefolded_exceptions.items), std.mem.max(u21, changes_when_casefolded_exceptions.items), changes_when_casefolded_exceptions.items.len }); + for (changes_when_casefolded_exceptions.items) |cp| try writer.interface.print("{}, ", .{cp}); + + try writer.interface.print( + \\ + \\}}; + \\ + \\pub const multiple_start: u21 = {}; + \\pub const stage1: [{}]u8 = .{{ + , .{ multiple_codepoint_start, meaningful_stage1.len }); + for (meaningful_stage1) |entry| try writer.interface.print("{}, ", .{entry}); + + try writer.interface.print( + \\ + \\}}; + \\ + \\pub const stage2: [{}]u8 = .{{ + , .{stage2.len}); + for (stage2) |entry| try writer.interface.print("{}, ", .{entry}); + + try writer.interface.print( + \\ + \\}}; + \\ + \\pub const stage3: [{}]i24 = .{{ + , .{stage3.len}); + for (stage3) |entry| try writer.interface.print("{}, ", .{entry}); + + try writer.interface.writeAll( + \\}; + ); try writer.interface.flush(); } -- cgit v1.2.3