diff options
Diffstat (limited to 'codegen/fold.zig')
| -rw-r--r-- | codegen/fold.zig | 80 |
1 files changed, 49 insertions, 31 deletions
diff --git a/codegen/fold.zig b/codegen/fold.zig index ec024c5..24ecae6 100644 --- a/codegen/fold.zig +++ b/codegen/fold.zig | |||
| @@ -1,32 +1,66 @@ | |||
| 1 | const std = @import("std"); | 1 | const std = @import("std"); |
| 2 | const builtin = @import("builtin"); | 2 | const builtin = @import("builtin"); |
| 3 | 3 | const mem = std.mem; | |
| 4 | // From https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt | ||
| 5 | // const case_folding_txt = @embedFile("CaseFolding.txt"); | ||
| 6 | 4 | ||
| 7 | pub fn main() !void { | 5 | pub fn main() !void { |
| 8 | var gpa = std.heap.GeneralPurposeAllocator(.{}){}; | 6 | var gpa = std.heap.GeneralPurposeAllocator(.{}){}; |
| 9 | defer std.debug.assert(gpa.deinit() == .ok); | 7 | defer std.debug.assert(gpa.deinit() == .ok); |
| 10 | const allocator = gpa.allocator(); | 8 | const allocator = gpa.allocator(); |
| 11 | 9 | ||
| 12 | // const unbuf_stdout = std.io.getStdOut().writer(); | 10 | // Process DerivedCoreProperties.txt |
| 13 | // var buf_stdout = std.io.bufferedWriter(unbuf_stdout); | 11 | var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); |
| 14 | // const writer = buf_stdout.writer(); | 12 | defer props_file.close(); |
| 13 | var props_buf = std.io.bufferedReader(props_file.reader()); | ||
| 14 | const props_reader = props_buf.reader(); | ||
| 15 | |||
| 16 | var props_map = std.AutoHashMap(u21, void).init(allocator); | ||
| 17 | defer props_map.deinit(); | ||
| 18 | |||
| 19 | var line_buf: [4096]u8 = undefined; | ||
| 20 | |||
| 21 | props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | ||
| 22 | if (line.len == 0 or line[0] == '#') continue; | ||
| 23 | |||
| 24 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | ||
| 25 | |||
| 26 | var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); | ||
| 27 | var current_code: [2]u21 = undefined; | ||
| 28 | |||
| 29 | var i: usize = 0; | ||
| 30 | while (field_iter.next()) |field| : (i += 1) { | ||
| 31 | switch (i) { | ||
| 32 | 0 => { | ||
| 33 | // Code point(s) | ||
| 34 | if (std.mem.indexOf(u8, field, "..")) |dots| { | ||
| 35 | current_code = .{ | ||
| 36 | try std.fmt.parseInt(u21, field[0..dots], 16), | ||
| 37 | try std.fmt.parseInt(u21, field[dots + 2 ..], 16), | ||
| 38 | }; | ||
| 39 | } else { | ||
| 40 | const code = try std.fmt.parseInt(u21, field, 16); | ||
| 41 | current_code = .{ code, code }; | ||
| 42 | } | ||
| 43 | }, | ||
| 44 | 1 => { | ||
| 45 | // Core property | ||
| 46 | if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :props_lines; | ||
| 47 | for (current_code[0]..current_code[1] + 1) |cp| try props_map.put(@intCast(cp), {}); | ||
| 48 | }, | ||
| 49 | else => {}, | ||
| 50 | } | ||
| 51 | } | ||
| 52 | } | ||
| 15 | 53 | ||
| 16 | var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator); | 54 | var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator); |
| 17 | defer codepoint_mapping.deinit(); | 55 | defer codepoint_mapping.deinit(); |
| 18 | 56 | ||
| 19 | // Process | 57 | // Process CaseFolding.txt |
| 20 | var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); | 58 | var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); |
| 21 | defer cp_file.close(); | 59 | defer cp_file.close(); |
| 22 | var cp_buf = std.io.bufferedReader(cp_file.reader()); | 60 | var cp_buf = std.io.bufferedReader(cp_file.reader()); |
| 23 | const cp_reader = cp_buf.reader(); | 61 | const cp_reader = cp_buf.reader(); |
| 24 | 62 | ||
| 25 | // var line_it = std.mem.tokenizeAny(u8, case_folding_txt, "\r\n"); | ||
| 26 | var line_buf: [4096]u8 = undefined; | ||
| 27 | |||
| 28 | while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 63 | while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { |
| 29 | // while (line_it.next()) |line| { | ||
| 30 | if (line.len == 0 or line[0] == '#') continue; | 64 | if (line.len == 0 or line[0] == '#') continue; |
| 31 | 65 | ||
| 32 | var field_it = std.mem.splitScalar(u8, line, ';'); | 66 | var field_it = std.mem.splitScalar(u8, line, ';'); |
| @@ -168,12 +202,6 @@ pub fn main() !void { | |||
| 168 | @memcpy(stage2[i * 256 ..][0..256], &key); | 202 | @memcpy(stage2[i * 256 ..][0..256], &key); |
| 169 | } | 203 | } |
| 170 | 204 | ||
| 171 | // try writer.print("const cutoff = 0x{X};\n", .{codepoint_cutoff}); | ||
| 172 | // try writeArray(writer, u8, "stage1", meaningful_stage1); | ||
| 173 | // try writeArray(writer, u8, "stage2", stage2); | ||
| 174 | // try writer.print("const multiple_start = {};\n", .{multiple_codepoint_start}); | ||
| 175 | // try writeArray(writer, i24, "stage3", stage3); | ||
| 176 | |||
| 177 | var args_iter = try std.process.argsWithAllocator(allocator); | 205 | var args_iter = try std.process.argsWithAllocator(allocator); |
| 178 | defer args_iter.deinit(); | 206 | defer args_iter.deinit(); |
| 179 | _ = args_iter.skip(); | 207 | _ = args_iter.skip(); |
| @@ -199,20 +227,10 @@ pub fn main() !void { | |||
| 199 | try writer.writeInt(u16, @intCast(stage3.len), endian); | 227 | try writer.writeInt(u16, @intCast(stage3.len), endian); |
| 200 | for (stage3) |offset| try writer.writeInt(i24, offset, endian); | 228 | for (stage3) |offset| try writer.writeInt(i24, offset, endian); |
| 201 | 229 | ||
| 230 | try writer.writeInt(u16, @intCast(props_map.count()), endian); | ||
| 231 | var iter = props_map.keyIterator(); | ||
| 232 | while (iter.next()) |key_ptr| try writer.writeInt(u24, key_ptr.*, endian); | ||
| 233 | |||
| 202 | try out_comp.flush(); | 234 | try out_comp.flush(); |
| 203 | } | 235 | } |
| 204 | |||
| 205 | // try buf_stdout.flush(); | ||
| 206 | } | 236 | } |
| 207 | |||
| 208 | // fn writeArray(writer: anytype, comptime T: type, name: []const u8, data: []const T) !void { | ||
| 209 | // try writer.print("const {s} = [{}]{s}{{", .{ name, data.len, @typeName(T) }); | ||
| 210 | // | ||
| 211 | // for (data, 0..) |v, i| { | ||
| 212 | // if (i % 32 == 0) try writer.writeAll("\n "); | ||
| 213 | // try writer.print("{},", .{v}); | ||
| 214 | // if (i != data.len - 1) try writer.writeByte(' '); | ||
| 215 | // } | ||
| 216 | // | ||
| 217 | // try writer.writeAll("\n};\n"); | ||
| 218 | // } | ||