summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-13 11:13:22 -0400
committerGravatar Jose Colon Rodriguez2024-02-13 11:13:22 -0400
commit9e64e04221dde9ef3919e4962d225c08a77ca627 (patch)
tree9513204c5fa24ac68731d46d608db4300b537abd
parentFlat array (diff)
parentUsing no_prop to short lookup (diff)
downloadzg-9e64e04221dde9ef3919e4962d225c08a77ca627.tar.gz
zg-9e64e04221dde9ef3919e4962d225c08a77ca627.tar.xz
zg-9e64e04221dde9ef3919e4962d225c08a77ca627.zip
Merge table
-rw-r--r--lang_mix.txt (renamed from src/lang_mix.txt)0
-rw-r--r--src/gbp_gen.zig118
-rw-r--r--src/main.zig8
3 files changed, 104 insertions, 22 deletions
diff --git a/src/lang_mix.txt b/lang_mix.txt
index 6eec94a..6eec94a 100644
--- a/src/lang_mix.txt
+++ b/lang_mix.txt
diff --git a/src/gbp_gen.zig b/src/gbp_gen.zig
index 7673931..afc54fc 100644
--- a/src/gbp_gen.zig
+++ b/src/gbp_gen.zig
@@ -34,20 +34,81 @@ const Prop = enum {
34 } 34 }
35}; 35};
36 36
37const block_size = 256;
38const Block = [block_size]u4;
39
40const BlockMap = std.HashMap(
41 Block,
42 u16,
43 struct {
44 pub fn hash(_: @This(), k: Block) u64 {
45 var hasher = std.hash.Wyhash.init(0);
46 std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
47 return hasher.final();
48 }
49
50 pub fn eql(_: @This(), a: Block, b: Block) bool {
51 return std.mem.eql(u4, &a, &b);
52 }
53 },
54 std.hash_map.default_max_load_percentage,
55);
56
37pub fn main() !void { 57pub fn main() !void {
38 var a = [_]?Prop{null} ** 1_114_112; 58 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
39 59 defer arena.deinit();
40 // for ('\u{0}'..'\u{10ffff}') |i| { 60 const allocator = arena.allocator();
41 for ('\u{0}'..'\u{10}') |i| { 61
42 const cp: u21 = @intCast(i); 62 var blocks_map = BlockMap.init(allocator);
43 const prop = Prop.forCodePoint(cp); 63 defer blocks_map.deinit();
44 if (prop == .none) continue; 64
45 a[cp] = prop; 65 const no_prop = std.math.maxInt(u16);
46 } 66
67 var stage1 = std.ArrayList(u16).init(allocator);
68 defer stage1.deinit();
69
70 var stage2 = std.ArrayList(u4).init(allocator);
71 defer stage2.deinit();
72
73 var stage3 = std.ArrayList(Prop).init(allocator);
74 defer stage3.deinit();
75
76 var block: Block = undefined;
77 var block_len: u16 = 0;
78
79 for (0..0x10ffff + 1) |cp| {
80 const prop = Prop.forCodePoint(@intCast(cp));
81
82 const block_idx = blk: {
83 for (stage3.items, 0..) |item, i| {
84 if (item == prop) break :blk i;
85 }
86
87 const idx = stage3.items.len;
88 try stage3.append(prop);
89 break :blk idx;
90 };
91
92 block[block_len] = @intCast(block_idx);
93 block_len += 1;
94
95 if (block_len < block_size and cp != 0x10ffff) continue;
96 if (block_len < block_size) @memset(block[block_len..block_size], 0);
47 97
48 const cp = '\u{10ffff}'; 98 const gop = try blocks_map.getOrPut(block);
49 const prop = Prop.forCodePoint(cp); 99 if (!gop.found_existing) {
50 if (prop != .none) a[cp] = prop; 100 gop.value_ptr.* = @intCast(stage2.items.len);
101 try stage2.appendSlice(block[0..block_len]);
102 }
103
104 if (prop == .none) {
105 try stage1.append(no_prop);
106 } else {
107 try stage1.append(gop.value_ptr.*);
108 }
109
110 block_len = 0;
111 }
51 112
52 var args_iter = std.process.args(); 113 var args_iter = std.process.args();
53 _ = args_iter.skip(); 114 _ = args_iter.skip();
@@ -59,6 +120,8 @@ pub fn main() !void {
59 const writer = out_buf.writer(); 120 const writer = out_buf.writer();
60 121
61 const prop_code = 122 const prop_code =
123 \\const std = @import("std");
124 \\
62 \\const Prop = enum { 125 \\const Prop = enum {
63 \\ none, 126 \\ none,
64 \\ 127 \\
@@ -79,20 +142,33 @@ pub fn main() !void {
79 142
80 try writer.writeAll(prop_code); 143 try writer.writeAll(prop_code);
81 144
82 try writer.writeAll("const array = [_]?Prop{"); 145 try writer.print("const stage_1 = [{}]u16{{", .{stage1.items.len});
83 for (&a, 0..) |v, i| { 146 for (stage1.items) |v| {
84 if (i != 0) try writer.writeByte(','); 147 _ = try writer.print("{},", .{v});
85 if (v) |p| { 148 }
86 _ = try writer.print(".{s}", .{@tagName(p)}); 149 try writer.writeAll("};\n");
87 } else { 150
88 try writer.writeAll("null"); 151 try writer.print("const stage_2 = [{}]u4{{", .{stage2.items.len});
89 } 152 for (stage2.items) |v| {
153 _ = try writer.print("{},", .{v});
154 }
155 try writer.writeAll("};\n");
156
157 try writer.print("const stage_3 = [{}]Prop{{", .{stage3.items.len});
158 for (stage3.items) |v| {
159 _ = try writer.print(".{s},", .{@tagName(v)});
90 } 160 }
91 try writer.writeAll("};\n"); 161 try writer.writeAll("};\n");
92 162
93 const code = 163 const code =
164 \\const no_prop = std.math.maxInt(u16);
165 \\
94 \\inline fn getProp(cp: u21) Prop { 166 \\inline fn getProp(cp: u21) Prop {
95 \\ return if (array[cp]) |prop| prop else .none; 167 \\ const stage_1_index = cp >> 8;
168 \\ if (stage_1[stage_1_index] == no_prop) return .none;
169 \\ const stage_2_index = stage_1[stage_1_index] + (cp & 0xff);
170 \\ const stage_3_index = stage_2[stage_2_index];
171 \\ return stage_3[stage_3_index];
96 \\} 172 \\}
97 \\ 173 \\
98 \\pub inline fn isControl(cp: u21) bool { 174 \\pub inline fn isControl(cp: u21) bool {
diff --git a/src/main.zig b/src/main.zig
index 5de7458..ca167e8 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -2,9 +2,15 @@ const std = @import("std");
2 2
3// const GraphemeIterator = @import("ziglyph").GraphemeIterator; 3// const GraphemeIterator = @import("ziglyph").GraphemeIterator;
4const GraphemeIterator = @import("Grapheme.zig").GraphemeIterator; 4const GraphemeIterator = @import("Grapheme.zig").GraphemeIterator;
5const input = @embedFile("lang_mix.txt");
6 5
7pub fn main() !void { 6pub fn main() !void {
7 var gpa = std.heap.GeneralPurposeAllocator(.{}){};
8 defer _ = gpa.deinit();
9 const allocator = gpa.allocator();
10
11 const input = try std.fs.cwd().readFileAlloc(allocator, "lang_mix.txt", std.math.maxInt(u32));
12 defer allocator.free(input);
13
8 var result: usize = 0; 14 var result: usize = 0;
9 var iter = GraphemeIterator.init(input); 15 var iter = GraphemeIterator.init(input);
10 16