summaryrefslogtreecommitdiff
path: root/codegen/fold.zig
diff options
context:
space:
mode:
Diffstat (limited to 'codegen/fold.zig')
-rw-r--r--codegen/fold.zig54
1 files changed, 51 insertions, 3 deletions
diff --git a/codegen/fold.zig b/codegen/fold.zig
index 7977e61..b3192e7 100644
--- a/codegen/fold.zig
+++ b/codegen/fold.zig
@@ -8,7 +8,51 @@ pub fn main() !void {
8 defer arena.deinit(); 8 defer arena.deinit();
9 const allocator = arena.allocator(); 9 const allocator = arena.allocator();
10 10
11 // Process DerivedEastAsianWidth.txt 11 // Process DerivedCoreProperties.txt
12 var cp_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
13 defer cp_file.close();
14 var cp_buf = std.io.bufferedReader(cp_file.reader());
15 const cp_reader = cp_buf.reader();
16
17 var cp_map = std.AutoHashMap(u21, void).init(allocator);
18 defer cp_map.deinit();
19
20 var line_buf: [4096]u8 = undefined;
21
22 cp_lines: while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
23 if (line.len == 0 or line[0] == '#') continue;
24
25 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
26
27 var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
28 var current_code: [2]u21 = undefined;
29
30 var i: usize = 0;
31 while (field_iter.next()) |field| : (i += 1) {
32 switch (i) {
33 0 => {
34 // Code point(s)
35 if (std.mem.indexOf(u8, field, "..")) |dots| {
36 current_code = .{
37 try std.fmt.parseInt(u21, field[0..dots], 16),
38 try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
39 };
40 } else {
41 const code = try std.fmt.parseInt(u21, field, 16);
42 current_code = .{ code, code };
43 }
44 },
45 1 => {
46 // Core property
47 if (!mem.eql(u8, field, "Changes_When_Casefolded")) continue :cp_lines;
48 for (current_code[0]..current_code[1] + 1) |cp| try cp_map.put(@intCast(cp), {});
49 },
50 else => {},
51 }
52 }
53 }
54
55 // Process CaseFolding.txt
12 var in_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{}); 56 var in_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
13 defer in_file.close(); 57 defer in_file.close();
14 var in_buf = std.io.bufferedReader(in_file.reader()); 58 var in_buf = std.io.bufferedReader(in_file.reader());
@@ -27,7 +71,6 @@ pub fn main() !void {
27 const writer = out_comp.writer(); 71 const writer = out_comp.writer();
28 72
29 const endian = builtin.cpu.arch.endian(); 73 const endian = builtin.cpu.arch.endian();
30 var line_buf: [4096]u8 = undefined;
31 74
32 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 75 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
33 if (line.len == 0 or line[0] == '#') continue; 76 if (line.len == 0 or line[0] == '#') continue;
@@ -41,7 +84,12 @@ pub fn main() !void {
41 var i: usize = 0; 84 var i: usize = 0;
42 while (field_iter.next()) |field| : (i += 1) { 85 while (field_iter.next()) |field| : (i += 1) {
43 switch (i) { 86 switch (i) {
44 0 => cps[0] = try fmt.parseInt(u24, field, 16), 87 0 => {
88 var cp = try fmt.parseInt(u21, field, 16);
89 cp <<= 1;
90 if (cp_map.contains(cp)) cp |= 1;
91 cps[0] = cp;
92 },
45 93
46 1 => { 94 1 => {
47 if (!mem.eql(u8, field, "C") and !mem.eql(u8, field, "F")) continue :lines; 95 if (!mem.eql(u8, field, "C") and !mem.eql(u8, field, "F")) continue :lines;