diff options
Diffstat (limited to 'codegen/fold.zig')
| -rw-r--r-- | codegen/fold.zig | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/codegen/fold.zig b/codegen/fold.zig index 7977e61..b3192e7 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig | |||
| @@ -8,7 +8,51 @@ pub fn main() !void { | |||
| 8 | defer arena.deinit(); | 8 | defer arena.deinit(); |
| 9 | const allocator = arena.allocator(); | 9 | const allocator = arena.allocator(); |
| 10 | 10 | ||
| 11 | // Process DerivedEastAsianWidth.txt | 11 | // Process DerivedCoreProperties.txt |
| 12 | var cp_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); | ||
| 13 | defer cp_file.close(); | ||
| 14 | var cp_buf = std.io.bufferedReader(cp_file.reader()); | ||
| 15 | const cp_reader = cp_buf.reader(); | ||
| 16 | |||
| 17 | var cp_map = std.AutoHashMap(u21, void).init(allocator); | ||
| 18 | defer cp_map.deinit(); | ||
| 19 | |||
| 20 | var line_buf: [4096]u8 = undefined; | ||
| 21 | |||
| 22 | cp_lines: while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | ||
| 23 | if (line.len == 0 or line[0] == '#') continue; | ||
| 24 | |||
| 25 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | ||
| 26 | |||
| 27 | var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); | ||
| 28 | var current_code: [2]u21 = undefined; | ||
| 29 | |||
| 30 | var i: usize = 0; | ||
| 31 | while (field_iter.next()) |field| : (i += 1) { | ||
| 32 | switch (i) { | ||
| 33 | 0 => { | ||
| 34 | // Code point(s) | ||
| 35 | if (std.mem.indexOf(u8, field, "..")) |dots| { | ||
| 36 | current_code = .{ | ||
| 37 | try std.fmt.parseInt(u21, field[0..dots], 16), | ||
| 38 | try std.fmt.parseInt(u21, field[dots + 2 ..], 16), | ||
| 39 | }; | ||
| 40 | } else { | ||
| 41 | const code = try std.fmt.parseInt(u21, field, 16); | ||
| 42 | current_code = .{ code, code }; | ||
| 43 | } | ||
| 44 | }, | ||
| 45 | 1 => { | ||
| 46 | // Core property | ||
| 47 | if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :cp_lines; | ||
| 48 | for (current_code[0]..current_code[1] + 1) |cp| try cp_map.put(@intCast(cp), {}); | ||
| 49 | }, | ||
| 50 | else => {}, | ||
| 51 | } | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | // Process CaseFolding.txt | ||
| 12 | var in_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); | 56 | var in_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); |
| 13 | defer in_file.close(); | 57 | defer in_file.close(); |
| 14 | var in_buf = std.io.bufferedReader(in_file.reader()); | 58 | var in_buf = std.io.bufferedReader(in_file.reader()); |
| @@ -27,7 +71,6 @@ pub fn main() !void { | |||
| 27 | const writer = out_comp.writer(); | 71 | const writer = out_comp.writer(); |
| 28 | 72 | ||
| 29 | const endian = builtin.cpu.arch.endian(); | 73 | const endian = builtin.cpu.arch.endian(); |
| 30 | var line_buf: [4096]u8 = undefined; | ||
| 31 | 74 | ||
| 32 | lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 75 | lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { |
| 33 | if (line.len == 0 or line[0] == '#') continue; | 76 | if (line.len == 0 or line[0] == '#') continue; |
| @@ -41,7 +84,12 @@ pub fn main() !void { | |||
| 41 | var i: usize = 0; | 84 | var i: usize = 0; |
| 42 | while (field_iter.next()) |field| : (i += 1) { | 85 | while (field_iter.next()) |field| : (i += 1) { |
| 43 | switch (i) { | 86 | switch (i) { |
| 44 | 0 => cps[0] = try fmt.parseInt(u24, field, 16), | 87 | 0 => { |
| 88 | var cp = try fmt.parseInt(u21, field, 16); | ||
| 89 | cp <<= 1; | ||
| 90 | if (cp_map.contains(cp)) cp |= 1; | ||
| 91 | cps[0] = cp; | ||
| 92 | }, | ||
| 45 | 93 | ||
| 46 | 1 => { | 94 | 1 => { |
| 47 | if (!mem.eql(u8, field, "C") and !mem.eql(u8, field, "F")) continue :lines; | 95 | if (!mem.eql(u8, field, "C") and !mem.eql(u8, field, "F")) continue :lines; |