diff options
Diffstat (limited to 'codegen/canon.zig')
| -rw-r--r-- | codegen/canon.zig | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/codegen/canon.zig b/codegen/canon.zig new file mode 100644 index 0000000..9d72edd --- /dev/null +++ b/codegen/canon.zig | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | |||
| 4 | pub fn main() !void { | ||
| 5 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | ||
| 6 | defer arena.deinit(); | ||
| 7 | const allocator = arena.allocator(); | ||
| 8 | |||
| 9 | // Process DerivedEastAsianWidth.txt | ||
| 10 | var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); | ||
| 11 | defer in_file.close(); | ||
| 12 | var in_buf = std.io.bufferedReader(in_file.reader()); | ||
| 13 | const in_reader = in_buf.reader(); | ||
| 14 | |||
| 15 | var args_iter = try std.process.argsWithAllocator(allocator); | ||
| 16 | defer args_iter.deinit(); | ||
| 17 | _ = args_iter.skip(); | ||
| 18 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | ||
| 19 | |||
| 20 | const compressor = std.compress.deflate.compressor; | ||
| 21 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | ||
| 22 | defer out_file.close(); | ||
| 23 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | ||
| 24 | defer out_comp.deinit(); | ||
| 25 | const writer = out_comp.writer(); | ||
| 26 | |||
| 27 | const endian = builtin.cpu.arch.endian(); | ||
| 28 | var line_buf: [4096]u8 = undefined; | ||
| 29 | |||
| 30 | lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | ||
| 31 | if (line.len == 0) continue; | ||
| 32 | |||
| 33 | var field_iter = std.mem.splitScalar(u8, line, ';'); | ||
| 34 | var cps: [3]u24 = undefined; | ||
| 35 | var len: u8 = 2; | ||
| 36 | |||
| 37 | var i: usize = 0; | ||
| 38 | while (field_iter.next()) |field| : (i += 1) { | ||
| 39 | switch (i) { | ||
| 40 | 0 => cps[0] = try std.fmt.parseInt(u24, field, 16), | ||
| 41 | |||
| 42 | 5 => { | ||
| 43 | // Not canonical. | ||
| 44 | if (field.len == 0 or field[0] == '<') continue :lines; | ||
| 45 | if (std.mem.indexOfScalar(u8, field, ' ')) |space| { | ||
| 46 | // Canonical | ||
| 47 | len = 3; | ||
| 48 | cps[1] = try std.fmt.parseInt(u24, field[0..space], 16); | ||
| 49 | cps[2] = try std.fmt.parseInt(u24, field[space + 1 ..], 16); | ||
| 50 | } else { | ||
| 51 | // Singleton | ||
| 52 | cps[1] = try std.fmt.parseInt(u24, field, 16); | ||
| 53 | } | ||
| 54 | }, | ||
| 55 | |||
| 56 | 2 => if (line[0] == '<') continue :lines, | ||
| 57 | |||
| 58 | else => {}, | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | try writer.writeInt(u8, @intCast(len), endian); | ||
| 63 | for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian); | ||
| 64 | } | ||
| 65 | |||
| 66 | try writer.writeInt(u16, 0, endian); | ||
| 67 | try out_comp.flush(); | ||
| 68 | } | ||