summaryrefslogtreecommitdiff
path: root/codegen
diff options
context:
space:
mode:
Diffstat (limited to 'codegen')
-rw-r--r--codegen/ccc.zig24
-rw-r--r--codegen/compat.zig121
-rw-r--r--codegen/fold.zig57
-rw-r--r--codegen/hangul.zig24
-rw-r--r--codegen/normp.zig24
5 files changed, 193 insertions, 57 deletions
diff --git a/codegen/ccc.zig b/codegen/ccc.zig
index 4e470ae..e76222f 100644
--- a/codegen/ccc.zig
+++ b/codegen/ccc.zig
@@ -112,12 +112,24 @@ pub fn main() anyerror!void {
112 defer out_file.close(); 112 defer out_file.close();
113 var writer = out_file.writer(&write_buf); 113 var writer = out_file.writer(&write_buf);
114 114
115 const endian = builtin.cpu.arch.endian(); 115 try writer.interface.print(
116 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); 116 \\//! This file is auto-generated. Do not edit.
117 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); 117 \\
118 118 \\pub const s1: [{}]u16 = .{{
119 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); 119 , .{stage1.items.len});
120 try writer.interface.writeAll(stage2.items); 120 for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry});
121
122 try writer.interface.print(
123 \\
124 \\}};
125 \\
126 \\pub const s2: [{}]u8 = .{{
127 , .{stage2.items.len});
128 for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry});
129
130 try writer.interface.writeAll(
131 \\};
132 );
121 133
122 try writer.interface.flush(); 134 try writer.interface.flush();
123} 135}
diff --git a/codegen/compat.zig b/codegen/compat.zig
index debb83d..a9d1f92 100644
--- a/codegen/compat.zig
+++ b/codegen/compat.zig
@@ -1,58 +1,82 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3 3
4const block_size = 256;
5const Block = [block_size][]const u21;
6
7const BlockMap = std.HashMap(
8 Block,
9 u16,
10 struct {
11 pub fn hash(_: @This(), k: Block) u64 {
12 var hasher = std.hash.Wyhash.init(0);
13 std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
14 return hasher.final();
15 }
16
17 pub fn eql(_: @This(), aBlock: Block, bBlock: Block) bool {
18 return for (aBlock, bBlock) |a, b| {
19 if (a.len != b.len) return false;
20 for (a, b) |a_cp, b_cp| {
21 if (a_cp != b_cp) return false;
22 }
23 } else true;
24 }
25 },
26 std.hash_map.default_max_load_percentage,
27);
28
4pub fn main() anyerror!void { 29pub fn main() anyerror!void {
5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 30 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
6 defer arena.deinit(); 31 defer arena.deinit();
7 const allocator = arena.allocator(); 32 const allocator = arena.allocator();
8 33
9 // Process UnicodeData.txt 34 // Process UnicodeData.txt
10 var write_buf: [4096]u8 = undefined;
11
12 var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); 35 var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt"));
13 var args_iter = try std.process.argsWithAllocator(allocator); 36 var args_iter = try std.process.argsWithAllocator(allocator);
14 defer args_iter.deinit(); 37 defer args_iter.deinit();
15 _ = args_iter.skip(); 38 _ = args_iter.skip();
16 const output_path = args_iter.next() orelse @panic("No output file arg!"); 39 const output_path = args_iter.next() orelse @panic("No output file arg!");
17 40
18 var out_file = try std.fs.cwd().createFile(output_path, .{}); 41 var compat_map = std.AutoHashMap(u21, []u21).init(allocator);
19 defer out_file.close(); 42 defer compat_map.deinit();
20 var writer = out_file.writer(&write_buf);
21 43
22 const endian = builtin.cpu.arch.endian(); 44 while (in_reader.takeDelimiterInclusive('\n')) |line| {
23
24 lines: while (in_reader.takeDelimiterInclusive('\n')) |took| {
25 const line = std.mem.trimRight(u8, took, "\n");
26 if (line.len == 0) continue; 45 if (line.len == 0) continue;
27 46
28 var field_iter = std.mem.splitScalar(u8, line, ';'); 47 var field_iter = std.mem.splitScalar(u8, line, ';');
29 var cps: [19]u24 = undefined; 48 var cp: u21 = undefined;
30 var len: u8 = 1;
31 49
32 var i: usize = 0; 50 var i: usize = 0;
33 while (field_iter.next()) |field| : (i += 1) { 51 while (field_iter.next()) |field| : (i += 1) {
52 if (field.len == 0) continue;
53
34 switch (i) { 54 switch (i) {
35 0 => cps[0] = try std.fmt.parseInt(u24, field, 16), 55 0 => {
56 cp = try std.fmt.parseInt(u21, field, 16);
57 },
36 58
37 5 => { 59 5 => {
38 // Not compatibility. 60 // Not compatibility.
39 if (field.len == 0 or field[0] != '<') continue :lines; 61 if (field[0] != '<') continue;
62
40 var cp_iter = std.mem.tokenizeScalar(u8, field, ' '); 63 var cp_iter = std.mem.tokenizeScalar(u8, field, ' ');
41 _ = cp_iter.next(); // <compat type> 64 _ = cp_iter.next(); // <compat type>
42 65
66 var cps: [18]u21 = undefined;
67 var len: u8 = 0;
68
43 while (cp_iter.next()) |cp_str| : (len += 1) { 69 while (cp_iter.next()) |cp_str| : (len += 1) {
44 cps[len] = try std.fmt.parseInt(u24, cp_str, 16); 70 cps[len] = try std.fmt.parseInt(u21, cp_str, 16);
45 } 71 }
46 },
47 72
48 2 => if (line[0] == '<') continue :lines, 73 const slice = try allocator.dupe(u21, cps[0..len]);
74 try compat_map.put(cp, slice);
75 },
49 76
50 else => {}, 77 else => {},
51 } 78 }
52 } 79 }
53
54 try writer.interface.writeInt(u8, @intCast(len), endian);
55 for (cps[0..len]) |cp| try writer.interface.writeInt(u24, cp, endian);
56 } else |err| switch (err) { 80 } else |err| switch (err) {
57 error.EndOfStream => {}, 81 error.EndOfStream => {},
58 else => { 82 else => {
@@ -60,6 +84,63 @@ pub fn main() anyerror!void {
60 }, 84 },
61 } 85 }
62 86
63 try writer.interface.writeInt(u16, 0, endian); 87 // Build multi-tiered lookup tables for compatibility decompositions
88 var blocks_map = BlockMap.init(allocator);
89 defer blocks_map.deinit();
90
91 var stage1 = std.array_list.Managed(u16).init(allocator);
92 defer stage1.deinit();
93
94 var stage2 = std.array_list.Managed([]const u21).init(allocator);
95 defer stage2.deinit();
96
97 var block: Block = [_][]const u21{&[_]u21{}} ** block_size;
98 var block_len: u16 = 0;
99
100 for (0..0x110000) |i| {
101 const cp: u21 = @intCast(i);
102 const compat: []const u21 = compat_map.get(cp) orelse &[_]u21{};
103
104 block[block_len] = compat;
105 block_len += 1;
106
107 if (block_len < block_size and cp != 0x10ffff) continue;
108
109 const gop = try blocks_map.getOrPut(block);
110 if (!gop.found_existing) {
111 gop.value_ptr.* = @intCast(stage2.items.len);
112 try stage2.appendSlice(&block);
113 }
114
115 try stage1.append(gop.value_ptr.*);
116 block_len = 0;
117 }
118 // Write out
119 var write_buf: [4096]u8 = undefined;
120 var out_file = try std.fs.cwd().createFile(output_path, .{});
121 defer out_file.close();
122 var writer = out_file.writer(&write_buf);
123
124 try writer.interface.print(
125 \\//! This file is auto-generated. Do not edit.
126 \\
127 \\pub const s1: [{}]u16 = .{{
128 , .{stage1.items.len});
129 for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry});
130
131 try writer.interface.print(
132 \\
133 \\}};
134 \\
135 \\pub const s2: [{}][]const u21 = .{{
136 , .{stage2.items.len});
137 for (stage2.items) |entry| {
138 try writer.interface.print("&.{any}, ", .{entry});
139 }
140
141 try writer.interface.writeAll(
142 \\};
143 );
144
64 try writer.interface.flush(); 145 try writer.interface.flush();
65} 146}
diff --git a/codegen/fold.zig b/codegen/fold.zig
index 366ed79..c5f54eb 100644
--- a/codegen/fold.zig
+++ b/codegen/fold.zig
@@ -228,26 +228,45 @@ pub fn main() anyerror!void {
228 var out_file = try std.fs.cwd().createFile(output_path, .{}); 228 var out_file = try std.fs.cwd().createFile(output_path, .{});
229 defer out_file.close(); 229 defer out_file.close();
230 var writer = out_file.writer(&write_buf); 230 var writer = out_file.writer(&write_buf);
231
232 const endian = builtin.cpu.arch.endian();
233 // Table metadata. 231 // Table metadata.
234 try writer.interface.writeInt(u24, @intCast(codepoint_cutoff), endian); 232 try writer.interface.print(
235 try writer.interface.writeInt(u24, @intCast(multiple_codepoint_start), endian); 233 \\//! This file is auto-generated. Do not edit.
236 // Stage 1 234 \\
237 try writer.interface.writeInt(u16, @intCast(meaningful_stage1.len), endian); 235 \\pub const cutoff: u21 = {};
238 try writer.interface.writeAll(meaningful_stage1); 236 \\pub const cwcf_exceptions_min: u21 = {};
239 // Stage 2 237 \\pub const cwcf_exceptions_max: u21 = {};
240 try writer.interface.writeInt(u16, @intCast(stage2.len), endian); 238 \\pub const cwcf_exceptions: [{}]u21 = .{{
241 try writer.interface.writeAll(stage2); 239 , .{ codepoint_cutoff, std.mem.min(u21, changes_when_casefolded_exceptions.items), std.mem.max(u21, changes_when_casefolded_exceptions.items), changes_when_casefolded_exceptions.items.len });
242 // Stage 3 240 for (changes_when_casefolded_exceptions.items) |cp| try writer.interface.print("{}, ", .{cp});
243 try writer.interface.writeInt(u16, @intCast(stage3.len), endian); 241
244 for (stage3) |offset| try writer.interface.writeInt(i24, offset, endian); 242 try writer.interface.print(
245 // Changes when case folded 243 \\
246 // Min and max 244 \\}};
247 try writer.interface.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian); 245 \\
248 try writer.interface.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian); 246 \\pub const multiple_start: u21 = {};
249 try writer.interface.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian); 247 \\pub const stage1: [{}]u8 = .{{
250 for (changes_when_casefolded_exceptions.items) |cp| try writer.interface.writeInt(u24, cp, endian); 248 , .{ multiple_codepoint_start, meaningful_stage1.len });
249 for (meaningful_stage1) |entry| try writer.interface.print("{}, ", .{entry});
250
251 try writer.interface.print(
252 \\
253 \\}};
254 \\
255 \\pub const stage2: [{}]u8 = .{{
256 , .{stage2.len});
257 for (stage2) |entry| try writer.interface.print("{}, ", .{entry});
258
259 try writer.interface.print(
260 \\
261 \\}};
262 \\
263 \\pub const stage3: [{}]i24 = .{{
264 , .{stage3.len});
265 for (stage3) |entry| try writer.interface.print("{}, ", .{entry});
266
267 try writer.interface.writeAll(
268 \\};
269 );
251 270
252 try writer.interface.flush(); 271 try writer.interface.flush();
253 } 272 }
diff --git a/codegen/hangul.zig b/codegen/hangul.zig
index 2e4c175..d7504a9 100644
--- a/codegen/hangul.zig
+++ b/codegen/hangul.zig
@@ -120,12 +120,24 @@ pub fn main() anyerror!void {
120 defer out_file.close(); 120 defer out_file.close();
121 var writer = out_file.writer(&write_buf); 121 var writer = out_file.writer(&write_buf);
122 122
123 const endian = builtin.cpu.arch.endian(); 123 try writer.interface.print(
124 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); 124 \\//! This file is auto-generated. Do not edit.
125 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); 125 \\
126 126 \\pub const s1: [{}]u16 = .{{
127 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); 127 , .{stage1.items.len});
128 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian); 128 for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry});
129
130 try writer.interface.print(
131 \\
132 \\}};
133 \\
134 \\pub const s2: [{}]u3 = .{{
135 , .{stage2.items.len});
136 for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry});
137
138 try writer.interface.writeAll(
139 \\};
140 );
129 141
130 try writer.interface.flush(); 142 try writer.interface.flush();
131} 143}
diff --git a/codegen/normp.zig b/codegen/normp.zig
index eaf6989..343f03e 100644
--- a/codegen/normp.zig
+++ b/codegen/normp.zig
@@ -121,12 +121,24 @@ pub fn main() anyerror!void {
121 defer out_file.close(); 121 defer out_file.close();
122 var writer = out_file.writer(&write_buf); 122 var writer = out_file.writer(&write_buf);
123 123
124 const endian = builtin.cpu.arch.endian(); 124 try writer.interface.print(
125 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); 125 \\//! This file is auto-generated. Do not edit.
126 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); 126 \\
127 127 \\pub const s1: [{}]u16 = .{{
128 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); 128 , .{stage1.items.len});
129 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian); 129 for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry});
130
131 try writer.interface.print(
132 \\
133 \\}};
134 \\
135 \\pub const s2: [{}]u3 = .{{
136 , .{stage2.items.len});
137 for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry});
138
139 try writer.interface.writeAll(
140 \\};
141 );
130 142
131 try writer.interface.flush(); 143 try writer.interface.flush();
132} 144}