diff options
| author | 2025-12-23 09:34:19 -0500 | |
|---|---|---|
| committer | 2025-12-23 09:34:19 -0500 | |
| commit | 79b133e5d88fe6cfce337dd401fc09999db08852 (patch) | |
| tree | 8b3f9062edde82724c73147abf42143a885640fc /codegen/gbp.zig | |
| parent | Merge branch 'develop-next' (diff) | |
| parent | Use takeDelimiterInclusive to support Zig 0.15.2 (diff) | |
| download | zg-79b133e5d88fe6cfce337dd401fc09999db08852.tar.gz zg-79b133e5d88fe6cfce337dd401fc09999db08852.tar.xz zg-79b133e5d88fe6cfce337dd401fc09999db08852.zip | |
Merge branch 'fifteen-two'
Close #90
Close #87
Close #83
Thanks everyone.
Diffstat (limited to 'codegen/gbp.zig')
| -rw-r--r-- | codegen/gbp.zig | 71 |
1 files changed, 39 insertions, 32 deletions
diff --git a/codegen/gbp.zig b/codegen/gbp.zig index 3fc4461..1d06e9a 100644 --- a/codegen/gbp.zig +++ b/codegen/gbp.zig | |||
| @@ -47,7 +47,7 @@ const BlockMap = std.HashMap( | |||
| 47 | std.hash_map.default_max_load_percentage, | 47 | std.hash_map.default_max_load_percentage, |
| 48 | ); | 48 | ); |
| 49 | 49 | ||
| 50 | pub fn main() !void { | 50 | pub fn main() anyerror!void { |
| 51 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | 51 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| 52 | defer arena.deinit(); | 52 | defer arena.deinit(); |
| 53 | const allocator = arena.allocator(); | 53 | const allocator = arena.allocator(); |
| @@ -61,15 +61,12 @@ pub fn main() !void { | |||
| 61 | var emoji_set = std.AutoHashMap(u21, void).init(allocator); | 61 | var emoji_set = std.AutoHashMap(u21, void).init(allocator); |
| 62 | defer emoji_set.deinit(); | 62 | defer emoji_set.deinit(); |
| 63 | 63 | ||
| 64 | var line_buf: [4096]u8 = undefined; | ||
| 65 | |||
| 66 | // Process Indic | 64 | // Process Indic |
| 67 | var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); | 65 | const indic_file = @embedFile("DerivedCoreProperties.txt"); |
| 68 | defer indic_file.close(); | 66 | var indic_reader = std.io.Reader.fixed(indic_file); |
| 69 | var indic_buf = std.io.bufferedReader(indic_file.reader()); | ||
| 70 | const indic_reader = indic_buf.reader(); | ||
| 71 | 67 | ||
| 72 | while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 68 | while (indic_reader.takeDelimiterInclusive('\n')) |took| { |
| 69 | const line = std.mem.trimRight(u8, took, "\n"); | ||
| 73 | if (line.len == 0 or line[0] == '#') continue; | 70 | if (line.len == 0 or line[0] == '#') continue; |
| 74 | if (std.mem.indexOf(u8, line, "InCB") == null) continue; | 71 | if (std.mem.indexOf(u8, line, "InCB") == null) continue; |
| 75 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | 72 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; |
| @@ -100,15 +97,18 @@ pub fn main() !void { | |||
| 100 | else => {}, | 97 | else => {}, |
| 101 | } | 98 | } |
| 102 | } | 99 | } |
| 100 | } else |err| switch (err) { | ||
| 101 | error.EndOfStream => {}, | ||
| 102 | else => { | ||
| 103 | return err; | ||
| 104 | }, | ||
| 103 | } | 105 | } |
| 104 | |||
| 105 | // Process GBP | 106 | // Process GBP |
| 106 | var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{}); | ||
| 107 | defer gbp_file.close(); | ||
| 108 | var gbp_buf = std.io.bufferedReader(gbp_file.reader()); | ||
| 109 | const gbp_reader = gbp_buf.reader(); | ||
| 110 | 107 | ||
| 111 | while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 108 | var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt")); |
| 109 | |||
| 110 | while (gbp_reader.takeDelimiterInclusive('\n')) |took| { | ||
| 111 | const line = std.mem.trimRight(u8, took, "\n"); | ||
| 112 | if (line.len == 0 or line[0] == '#') continue; | 112 | if (line.len == 0 or line[0] == '#') continue; |
| 113 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | 113 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; |
| 114 | 114 | ||
| @@ -138,15 +138,18 @@ pub fn main() !void { | |||
| 138 | else => {}, | 138 | else => {}, |
| 139 | } | 139 | } |
| 140 | } | 140 | } |
| 141 | } else |err| switch (err) { | ||
| 142 | error.EndOfStream => {}, | ||
| 143 | else => { | ||
| 144 | return err; | ||
| 145 | }, | ||
| 141 | } | 146 | } |
| 142 | |||
| 143 | // Process Emoji | 147 | // Process Emoji |
| 144 | var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); | ||
| 145 | defer emoji_file.close(); | ||
| 146 | var emoji_buf = std.io.bufferedReader(emoji_file.reader()); | ||
| 147 | const emoji_reader = emoji_buf.reader(); | ||
| 148 | 148 | ||
| 149 | while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 149 | var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt")); |
| 150 | |||
| 151 | while (emoji_reader.takeDelimiterInclusive('\n')) |took| { | ||
| 152 | const line = std.mem.trimRight(u8, took, "\n"); | ||
| 150 | if (line.len == 0 or line[0] == '#') continue; | 153 | if (line.len == 0 or line[0] == '#') continue; |
| 151 | if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; | 154 | if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; |
| 152 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | 155 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; |
| @@ -170,15 +173,20 @@ pub fn main() !void { | |||
| 170 | else => {}, | 173 | else => {}, |
| 171 | } | 174 | } |
| 172 | } | 175 | } |
| 176 | } else |err| switch (err) { | ||
| 177 | error.EndOfStream => {}, | ||
| 178 | else => { | ||
| 179 | return err; | ||
| 180 | }, | ||
| 173 | } | 181 | } |
| 174 | 182 | ||
| 175 | var blocks_map = BlockMap.init(allocator); | 183 | var blocks_map = BlockMap.init(allocator); |
| 176 | defer blocks_map.deinit(); | 184 | defer blocks_map.deinit(); |
| 177 | 185 | ||
| 178 | var stage1 = std.ArrayList(u16).init(allocator); | 186 | var stage1 = std.array_list.Managed(u16).init(allocator); |
| 179 | defer stage1.deinit(); | 187 | defer stage1.deinit(); |
| 180 | 188 | ||
| 181 | var stage2 = std.ArrayList(u16).init(allocator); | 189 | var stage2 = std.array_list.Managed(u16).init(allocator); |
| 182 | defer stage2.deinit(); | 190 | defer stage2.deinit(); |
| 183 | 191 | ||
| 184 | var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator); | 192 | var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator); |
| @@ -227,22 +235,21 @@ pub fn main() !void { | |||
| 227 | _ = args_iter.skip(); | 235 | _ = args_iter.skip(); |
| 228 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 236 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 229 | 237 | ||
| 230 | const compressor = std.compress.flate.deflate.compressor; | 238 | var write_buf: [4096]u8 = undefined; |
| 231 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 239 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 232 | defer out_file.close(); | 240 | defer out_file.close(); |
| 233 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); | 241 | var writer = out_file.writer(&write_buf); |
| 234 | const writer = out_comp.writer(); | ||
| 235 | 242 | ||
| 236 | const endian = builtin.cpu.arch.endian(); | 243 | const endian = builtin.cpu.arch.endian(); |
| 237 | try writer.writeInt(u16, @intCast(stage1.items.len), endian); | 244 | try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); |
| 238 | for (stage1.items) |i| try writer.writeInt(u16, i, endian); | 245 | for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); |
| 239 | 246 | ||
| 240 | try writer.writeInt(u16, @intCast(stage2.items.len), endian); | 247 | try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); |
| 241 | for (stage2.items) |i| try writer.writeInt(u16, i, endian); | 248 | for (stage2.items) |i| try writer.interface.writeInt(u16, i, endian); |
| 242 | 249 | ||
| 243 | const props_bytes = stage3.keys(); | 250 | const props_bytes = stage3.keys(); |
| 244 | try writer.writeInt(u16, @intCast(props_bytes.len), endian); | 251 | try writer.interface.writeInt(u16, @intCast(props_bytes.len), endian); |
| 245 | try writer.writeAll(props_bytes); | 252 | try writer.interface.writeAll(props_bytes); |
| 246 | 253 | ||
| 247 | try out_comp.flush(); | 254 | try writer.interface.flush(); |
| 248 | } | 255 | } |