summaryrefslogtreecommitdiff
path: root/codegen/gbp.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-12-23 09:34:19 -0500
committerGravatar Sam Atman2025-12-23 09:34:19 -0500
commit79b133e5d88fe6cfce337dd401fc09999db08852 (patch)
tree8b3f9062edde82724c73147abf42143a885640fc /codegen/gbp.zig
parentMerge branch 'develop-next' (diff)
parentUse takeDelimiterInclusive to support Zig 0.15.2 (diff)
downloadzg-79b133e5d88fe6cfce337dd401fc09999db08852.tar.gz
zg-79b133e5d88fe6cfce337dd401fc09999db08852.tar.xz
zg-79b133e5d88fe6cfce337dd401fc09999db08852.zip
Merge branch 'fifteen-two'
Close #90 Close #87 Close #83 Thanks everyone.
Diffstat (limited to 'codegen/gbp.zig')
-rw-r--r--codegen/gbp.zig71
1 files changed, 39 insertions, 32 deletions
diff --git a/codegen/gbp.zig b/codegen/gbp.zig
index 3fc4461..1d06e9a 100644
--- a/codegen/gbp.zig
+++ b/codegen/gbp.zig
@@ -47,7 +47,7 @@ const BlockMap = std.HashMap(
47 std.hash_map.default_max_load_percentage, 47 std.hash_map.default_max_load_percentage,
48); 48);
49 49
50pub fn main() !void { 50pub fn main() anyerror!void {
51 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 51 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
52 defer arena.deinit(); 52 defer arena.deinit();
53 const allocator = arena.allocator(); 53 const allocator = arena.allocator();
@@ -61,15 +61,12 @@ pub fn main() !void {
61 var emoji_set = std.AutoHashMap(u21, void).init(allocator); 61 var emoji_set = std.AutoHashMap(u21, void).init(allocator);
62 defer emoji_set.deinit(); 62 defer emoji_set.deinit();
63 63
64 var line_buf: [4096]u8 = undefined;
65
66 // Process Indic 64 // Process Indic
67 var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); 65 const indic_file = @embedFile("DerivedCoreProperties.txt");
68 defer indic_file.close(); 66 var indic_reader = std.io.Reader.fixed(indic_file);
69 var indic_buf = std.io.bufferedReader(indic_file.reader());
70 const indic_reader = indic_buf.reader();
71 67
72 while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 68 while (indic_reader.takeDelimiterInclusive('\n')) |took| {
69 const line = std.mem.trimRight(u8, took, "\n");
73 if (line.len == 0 or line[0] == '#') continue; 70 if (line.len == 0 or line[0] == '#') continue;
74 if (std.mem.indexOf(u8, line, "InCB") == null) continue; 71 if (std.mem.indexOf(u8, line, "InCB") == null) continue;
75 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 72 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -100,15 +97,18 @@ pub fn main() !void {
100 else => {}, 97 else => {},
101 } 98 }
102 } 99 }
100 } else |err| switch (err) {
101 error.EndOfStream => {},
102 else => {
103 return err;
104 },
103 } 105 }
104
105 // Process GBP 106 // Process GBP
106 var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{});
107 defer gbp_file.close();
108 var gbp_buf = std.io.bufferedReader(gbp_file.reader());
109 const gbp_reader = gbp_buf.reader();
110 107
111 while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 108 var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt"));
109
110 while (gbp_reader.takeDelimiterInclusive('\n')) |took| {
111 const line = std.mem.trimRight(u8, took, "\n");
112 if (line.len == 0 or line[0] == '#') continue; 112 if (line.len == 0 or line[0] == '#') continue;
113 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 113 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
114 114
@@ -138,15 +138,18 @@ pub fn main() !void {
138 else => {}, 138 else => {},
139 } 139 }
140 } 140 }
141 } else |err| switch (err) {
142 error.EndOfStream => {},
143 else => {
144 return err;
145 },
141 } 146 }
142
143 // Process Emoji 147 // Process Emoji
144 var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
145 defer emoji_file.close();
146 var emoji_buf = std.io.bufferedReader(emoji_file.reader());
147 const emoji_reader = emoji_buf.reader();
148 148
149 while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 149 var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt"));
150
151 while (emoji_reader.takeDelimiterInclusive('\n')) |took| {
152 const line = std.mem.trimRight(u8, took, "\n");
150 if (line.len == 0 or line[0] == '#') continue; 153 if (line.len == 0 or line[0] == '#') continue;
151 if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; 154 if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue;
152 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 155 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -170,15 +173,20 @@ pub fn main() !void {
170 else => {}, 173 else => {},
171 } 174 }
172 } 175 }
176 } else |err| switch (err) {
177 error.EndOfStream => {},
178 else => {
179 return err;
180 },
173 } 181 }
174 182
175 var blocks_map = BlockMap.init(allocator); 183 var blocks_map = BlockMap.init(allocator);
176 defer blocks_map.deinit(); 184 defer blocks_map.deinit();
177 185
178 var stage1 = std.ArrayList(u16).init(allocator); 186 var stage1 = std.array_list.Managed(u16).init(allocator);
179 defer stage1.deinit(); 187 defer stage1.deinit();
180 188
181 var stage2 = std.ArrayList(u16).init(allocator); 189 var stage2 = std.array_list.Managed(u16).init(allocator);
182 defer stage2.deinit(); 190 defer stage2.deinit();
183 191
184 var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator); 192 var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator);
@@ -227,22 +235,21 @@ pub fn main() !void {
227 _ = args_iter.skip(); 235 _ = args_iter.skip();
228 const output_path = args_iter.next() orelse @panic("No output file arg!"); 236 const output_path = args_iter.next() orelse @panic("No output file arg!");
229 237
230 const compressor = std.compress.flate.deflate.compressor; 238 var write_buf: [4096]u8 = undefined;
231 var out_file = try std.fs.cwd().createFile(output_path, .{}); 239 var out_file = try std.fs.cwd().createFile(output_path, .{});
232 defer out_file.close(); 240 defer out_file.close();
233 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 241 var writer = out_file.writer(&write_buf);
234 const writer = out_comp.writer();
235 242
236 const endian = builtin.cpu.arch.endian(); 243 const endian = builtin.cpu.arch.endian();
237 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 244 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
238 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 245 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
239 246
240 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 247 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
241 for (stage2.items) |i| try writer.writeInt(u16, i, endian); 248 for (stage2.items) |i| try writer.interface.writeInt(u16, i, endian);
242 249
243 const props_bytes = stage3.keys(); 250 const props_bytes = stage3.keys();
244 try writer.writeInt(u16, @intCast(props_bytes.len), endian); 251 try writer.interface.writeInt(u16, @intCast(props_bytes.len), endian);
245 try writer.writeAll(props_bytes); 252 try writer.interface.writeAll(props_bytes);
246 253
247 try out_comp.flush(); 254 try writer.interface.flush();
248} 255}