summaryrefslogtreecommitdiff
path: root/src/gbp_gen.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/gbp_gen.zig')
-rw-r--r--src/gbp_gen.zig118
1 files changed, 97 insertions, 21 deletions
diff --git a/src/gbp_gen.zig b/src/gbp_gen.zig
index 7673931..afc54fc 100644
--- a/src/gbp_gen.zig
+++ b/src/gbp_gen.zig
@@ -34,20 +34,81 @@ const Prop = enum {
34 } 34 }
35}; 35};
36 36
37const block_size = 256;
38const Block = [block_size]u4;
39
40const BlockMap = std.HashMap(
41 Block,
42 u16,
43 struct {
44 pub fn hash(_: @This(), k: Block) u64 {
45 var hasher = std.hash.Wyhash.init(0);
46 std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
47 return hasher.final();
48 }
49
50 pub fn eql(_: @This(), a: Block, b: Block) bool {
51 return std.mem.eql(u4, &a, &b);
52 }
53 },
54 std.hash_map.default_max_load_percentage,
55);
56
37pub fn main() !void { 57pub fn main() !void {
38 var a = [_]?Prop{null} ** 1_114_112; 58 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
39 59 defer arena.deinit();
40 // for ('\u{0}'..'\u{10ffff}') |i| { 60 const allocator = arena.allocator();
41 for ('\u{0}'..'\u{10}') |i| { 61
42 const cp: u21 = @intCast(i); 62 var blocks_map = BlockMap.init(allocator);
43 const prop = Prop.forCodePoint(cp); 63 defer blocks_map.deinit();
44 if (prop == .none) continue; 64
45 a[cp] = prop; 65 const no_prop = std.math.maxInt(u16);
46 } 66
67 var stage1 = std.ArrayList(u16).init(allocator);
68 defer stage1.deinit();
69
70 var stage2 = std.ArrayList(u4).init(allocator);
71 defer stage2.deinit();
72
73 var stage3 = std.ArrayList(Prop).init(allocator);
74 defer stage3.deinit();
75
76 var block: Block = undefined;
77 var block_len: u16 = 0;
78
79 for (0..0x10ffff + 1) |cp| {
80 const prop = Prop.forCodePoint(@intCast(cp));
81
82 const block_idx = blk: {
83 for (stage3.items, 0..) |item, i| {
84 if (item == prop) break :blk i;
85 }
86
87 const idx = stage3.items.len;
88 try stage3.append(prop);
89 break :blk idx;
90 };
91
92 block[block_len] = @intCast(block_idx);
93 block_len += 1;
94
95 if (block_len < block_size and cp != 0x10ffff) continue;
96 if (block_len < block_size) @memset(block[block_len..block_size], 0);
47 97
48 const cp = '\u{10ffff}'; 98 const gop = try blocks_map.getOrPut(block);
49 const prop = Prop.forCodePoint(cp); 99 if (!gop.found_existing) {
50 if (prop != .none) a[cp] = prop; 100 gop.value_ptr.* = @intCast(stage2.items.len);
101 try stage2.appendSlice(block[0..block_len]);
102 }
103
104 if (prop == .none) {
105 try stage1.append(no_prop);
106 } else {
107 try stage1.append(gop.value_ptr.*);
108 }
109
110 block_len = 0;
111 }
51 112
52 var args_iter = std.process.args(); 113 var args_iter = std.process.args();
53 _ = args_iter.skip(); 114 _ = args_iter.skip();
@@ -59,6 +120,8 @@ pub fn main() !void {
59 const writer = out_buf.writer(); 120 const writer = out_buf.writer();
60 121
61 const prop_code = 122 const prop_code =
123 \\const std = @import("std");
124 \\
62 \\const Prop = enum { 125 \\const Prop = enum {
63 \\ none, 126 \\ none,
64 \\ 127 \\
@@ -79,20 +142,33 @@ pub fn main() !void {
79 142
80 try writer.writeAll(prop_code); 143 try writer.writeAll(prop_code);
81 144
82 try writer.writeAll("const array = [_]?Prop{"); 145 try writer.print("const stage_1 = [{}]u16{{", .{stage1.items.len});
83 for (&a, 0..) |v, i| { 146 for (stage1.items) |v| {
84 if (i != 0) try writer.writeByte(','); 147 _ = try writer.print("{},", .{v});
85 if (v) |p| { 148 }
86 _ = try writer.print(".{s}", .{@tagName(p)}); 149 try writer.writeAll("};\n");
87 } else { 150
88 try writer.writeAll("null"); 151 try writer.print("const stage_2 = [{}]u4{{", .{stage2.items.len});
89 } 152 for (stage2.items) |v| {
153 _ = try writer.print("{},", .{v});
154 }
155 try writer.writeAll("};\n");
156
157 try writer.print("const stage_3 = [{}]Prop{{", .{stage3.items.len});
158 for (stage3.items) |v| {
159 _ = try writer.print(".{s},", .{@tagName(v)});
90 } 160 }
91 try writer.writeAll("};\n"); 161 try writer.writeAll("};\n");
92 162
93 const code = 163 const code =
164 \\const no_prop = std.math.maxInt(u16);
165 \\
94 \\inline fn getProp(cp: u21) Prop { 166 \\inline fn getProp(cp: u21) Prop {
95 \\ return if (array[cp]) |prop| prop else .none; 167 \\ const stage_1_index = cp >> 8;
168 \\ if (stage_1[stage_1_index] == no_prop) return .none;
169 \\ const stage_2_index = stage_1[stage_1_index] + (cp & 0xff);
170 \\ const stage_3_index = stage_2[stage_2_index];
171 \\ return stage_3[stage_3_index];
96 \\} 172 \\}
97 \\ 173 \\
98 \\pub inline fn isControl(cp: u21) bool { 174 \\pub inline fn isControl(cp: u21) bool {