diff options
| author | 2024-02-13 11:13:22 -0400 | |
|---|---|---|
| committer | 2024-02-13 11:13:22 -0400 | |
| commit | 9e64e04221dde9ef3919e4962d225c08a77ca627 (patch) | |
| tree | 9513204c5fa24ac68731d46d608db4300b537abd | |
| parent | Flat array (diff) | |
| parent | Using no_prop to short lookup (diff) | |
| download | zg-9e64e04221dde9ef3919e4962d225c08a77ca627.tar.gz zg-9e64e04221dde9ef3919e4962d225c08a77ca627.tar.xz zg-9e64e04221dde9ef3919e4962d225c08a77ca627.zip | |
Merge table
| -rw-r--r-- | lang_mix.txt (renamed from src/lang_mix.txt) | 0 | ||||
| -rw-r--r-- | src/gbp_gen.zig | 118 | ||||
| -rw-r--r-- | src/main.zig | 8 |
3 files changed, 104 insertions, 22 deletions
diff --git a/src/lang_mix.txt b/lang_mix.txt index 6eec94a..6eec94a 100644 --- a/src/lang_mix.txt +++ b/lang_mix.txt | |||
diff --git a/src/gbp_gen.zig b/src/gbp_gen.zig index 7673931..afc54fc 100644 --- a/src/gbp_gen.zig +++ b/src/gbp_gen.zig | |||
| @@ -34,20 +34,81 @@ const Prop = enum { | |||
| 34 | } | 34 | } |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | const block_size = 256; | ||
| 38 | const Block = [block_size]u4; | ||
| 39 | |||
| 40 | const BlockMap = std.HashMap( | ||
| 41 | Block, | ||
| 42 | u16, | ||
| 43 | struct { | ||
| 44 | pub fn hash(_: @This(), k: Block) u64 { | ||
| 45 | var hasher = std.hash.Wyhash.init(0); | ||
| 46 | std.hash.autoHashStrat(&hasher, k, .DeepRecursive); | ||
| 47 | return hasher.final(); | ||
| 48 | } | ||
| 49 | |||
| 50 | pub fn eql(_: @This(), a: Block, b: Block) bool { | ||
| 51 | return std.mem.eql(u4, &a, &b); | ||
| 52 | } | ||
| 53 | }, | ||
| 54 | std.hash_map.default_max_load_percentage, | ||
| 55 | ); | ||
| 56 | |||
| 37 | pub fn main() !void { | 57 | pub fn main() !void { |
| 38 | var a = [_]?Prop{null} ** 1_114_112; | 58 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); |
| 39 | 59 | defer arena.deinit(); | |
| 40 | // for ('\u{0}'..'\u{10ffff}') |i| { | 60 | const allocator = arena.allocator(); |
| 41 | for ('\u{0}'..'\u{10}') |i| { | 61 | |
| 42 | const cp: u21 = @intCast(i); | 62 | var blocks_map = BlockMap.init(allocator); |
| 43 | const prop = Prop.forCodePoint(cp); | 63 | defer blocks_map.deinit(); |
| 44 | if (prop == .none) continue; | 64 | |
| 45 | a[cp] = prop; | 65 | const no_prop = std.math.maxInt(u16); |
| 46 | } | 66 | |
| 67 | var stage1 = std.ArrayList(u16).init(allocator); | ||
| 68 | defer stage1.deinit(); | ||
| 69 | |||
| 70 | var stage2 = std.ArrayList(u4).init(allocator); | ||
| 71 | defer stage2.deinit(); | ||
| 72 | |||
| 73 | var stage3 = std.ArrayList(Prop).init(allocator); | ||
| 74 | defer stage3.deinit(); | ||
| 75 | |||
| 76 | var block: Block = undefined; | ||
| 77 | var block_len: u16 = 0; | ||
| 78 | |||
| 79 | for (0..0x10ffff + 1) |cp| { | ||
| 80 | const prop = Prop.forCodePoint(@intCast(cp)); | ||
| 81 | |||
| 82 | const block_idx = blk: { | ||
| 83 | for (stage3.items, 0..) |item, i| { | ||
| 84 | if (item == prop) break :blk i; | ||
| 85 | } | ||
| 86 | |||
| 87 | const idx = stage3.items.len; | ||
| 88 | try stage3.append(prop); | ||
| 89 | break :blk idx; | ||
| 90 | }; | ||
| 91 | |||
| 92 | block[block_len] = @intCast(block_idx); | ||
| 93 | block_len += 1; | ||
| 94 | |||
| 95 | if (block_len < block_size and cp != 0x10ffff) continue; | ||
| 96 | if (block_len < block_size) @memset(block[block_len..block_size], 0); | ||
| 47 | 97 | ||
| 48 | const cp = '\u{10ffff}'; | 98 | const gop = try blocks_map.getOrPut(block); |
| 49 | const prop = Prop.forCodePoint(cp); | 99 | if (!gop.found_existing) { |
| 50 | if (prop != .none) a[cp] = prop; | 100 | gop.value_ptr.* = @intCast(stage2.items.len); |
| 101 | try stage2.appendSlice(block[0..block_len]); | ||
| 102 | } | ||
| 103 | |||
| 104 | if (prop == .none) { | ||
| 105 | try stage1.append(no_prop); | ||
| 106 | } else { | ||
| 107 | try stage1.append(gop.value_ptr.*); | ||
| 108 | } | ||
| 109 | |||
| 110 | block_len = 0; | ||
| 111 | } | ||
| 51 | 112 | ||
| 52 | var args_iter = std.process.args(); | 113 | var args_iter = std.process.args(); |
| 53 | _ = args_iter.skip(); | 114 | _ = args_iter.skip(); |
| @@ -59,6 +120,8 @@ pub fn main() !void { | |||
| 59 | const writer = out_buf.writer(); | 120 | const writer = out_buf.writer(); |
| 60 | 121 | ||
| 61 | const prop_code = | 122 | const prop_code = |
| 123 | \\const std = @import("std"); | ||
| 124 | \\ | ||
| 62 | \\const Prop = enum { | 125 | \\const Prop = enum { |
| 63 | \\ none, | 126 | \\ none, |
| 64 | \\ | 127 | \\ |
| @@ -79,20 +142,33 @@ pub fn main() !void { | |||
| 79 | 142 | ||
| 80 | try writer.writeAll(prop_code); | 143 | try writer.writeAll(prop_code); |
| 81 | 144 | ||
| 82 | try writer.writeAll("const array = [_]?Prop{"); | 145 | try writer.print("const stage_1 = [{}]u16{{", .{stage1.items.len}); |
| 83 | for (&a, 0..) |v, i| { | 146 | for (stage1.items) |v| { |
| 84 | if (i != 0) try writer.writeByte(','); | 147 | _ = try writer.print("{},", .{v}); |
| 85 | if (v) |p| { | 148 | } |
| 86 | _ = try writer.print(".{s}", .{@tagName(p)}); | 149 | try writer.writeAll("};\n"); |
| 87 | } else { | 150 | |
| 88 | try writer.writeAll("null"); | 151 | try writer.print("const stage_2 = [{}]u4{{", .{stage2.items.len}); |
| 89 | } | 152 | for (stage2.items) |v| { |
| 153 | _ = try writer.print("{},", .{v}); | ||
| 154 | } | ||
| 155 | try writer.writeAll("};\n"); | ||
| 156 | |||
| 157 | try writer.print("const stage_3 = [{}]Prop{{", .{stage3.items.len}); | ||
| 158 | for (stage3.items) |v| { | ||
| 159 | _ = try writer.print(".{s},", .{@tagName(v)}); | ||
| 90 | } | 160 | } |
| 91 | try writer.writeAll("};\n"); | 161 | try writer.writeAll("};\n"); |
| 92 | 162 | ||
| 93 | const code = | 163 | const code = |
| 164 | \\const no_prop = std.math.maxInt(u16); | ||
| 165 | \\ | ||
| 94 | \\inline fn getProp(cp: u21) Prop { | 166 | \\inline fn getProp(cp: u21) Prop { |
| 95 | \\ return if (array[cp]) |prop| prop else .none; | 167 | \\ const stage_1_index = cp >> 8; |
| 168 | \\ if (stage_1[stage_1_index] == no_prop) return .none; | ||
| 169 | \\ const stage_2_index = stage_1[stage_1_index] + (cp & 0xff); | ||
| 170 | \\ const stage_3_index = stage_2[stage_2_index]; | ||
| 171 | \\ return stage_3[stage_3_index]; | ||
| 96 | \\} | 172 | \\} |
| 97 | \\ | 173 | \\ |
| 98 | \\pub inline fn isControl(cp: u21) bool { | 174 | \\pub inline fn isControl(cp: u21) bool { |
diff --git a/src/main.zig b/src/main.zig index 5de7458..ca167e8 100644 --- a/src/main.zig +++ b/src/main.zig | |||
| @@ -2,9 +2,15 @@ const std = @import("std"); | |||
| 2 | 2 | ||
| 3 | // const GraphemeIterator = @import("ziglyph").GraphemeIterator; | 3 | // const GraphemeIterator = @import("ziglyph").GraphemeIterator; |
| 4 | const GraphemeIterator = @import("Grapheme.zig").GraphemeIterator; | 4 | const GraphemeIterator = @import("Grapheme.zig").GraphemeIterator; |
| 5 | const input = @embedFile("lang_mix.txt"); | ||
| 6 | 5 | ||
| 7 | pub fn main() !void { | 6 | pub fn main() !void { |
| 7 | var gpa = std.heap.GeneralPurposeAllocator(.{}){}; | ||
| 8 | defer _ = gpa.deinit(); | ||
| 9 | const allocator = gpa.allocator(); | ||
| 10 | |||
| 11 | const input = try std.fs.cwd().readFileAlloc(allocator, "lang_mix.txt", std.math.maxInt(u32)); | ||
| 12 | defer allocator.free(input); | ||
| 13 | |||
| 8 | var result: usize = 0; | 14 | var result: usize = 0; |
| 9 | var iter = GraphemeIterator.init(input); | 15 | var iter = GraphemeIterator.init(input); |
| 10 | 16 | ||