diff options
| author | 2024-02-13 14:23:34 -0400 | |
|---|---|---|
| committer | 2024-02-13 14:23:34 -0400 | |
| commit | 005f2a30036ca5328ae0fffcd61749f2de2d0a7c (patch) | |
| tree | f502f0d9f5979d34158b8632c1fa3c6a18872a28 | |
| parent | Passing ziglyph tests (diff) | |
| download | zg-005f2a30036ca5328ae0fffcd61749f2de2d0a7c.tar.gz zg-005f2a30036ca5328ae0fffcd61749f2de2d0a7c.tar.xz zg-005f2a30036ca5328ae0fffcd61749f2de2d0a7c.zip | |
Using emoji table
| -rw-r--r-- | build.zig | 26 | ||||
| -rw-r--r-- | src/Grapheme.zig | 3 | ||||
| -rw-r--r-- | src/emoji_gen.zig | 93 |
3 files changed, 114 insertions, 8 deletions
| @@ -6,15 +6,25 @@ pub fn build(b: *std.Build) void { | |||
| 6 | 6 | ||
| 7 | const ziglyph = b.dependency("ziglyph", .{}); | 7 | const ziglyph = b.dependency("ziglyph", .{}); |
| 8 | 8 | ||
| 9 | const gen_exe = b.addExecutable(.{ | 9 | const gbp_gen_exe = b.addExecutable(.{ |
| 10 | .name = "gen", | 10 | .name = "gbp_gen", |
| 11 | .root_source_file = .{ .path = "src/gbp_gen.zig" }, | 11 | .root_source_file = .{ .path = "src/gbp_gen.zig" }, |
| 12 | .target = target, | 12 | .target = target, |
| 13 | .optimize = optimize, | 13 | .optimize = optimize, |
| 14 | }); | 14 | }); |
| 15 | gen_exe.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); | 15 | gbp_gen_exe.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); |
| 16 | const run_gen_exe = b.addRunArtifact(gen_exe); | 16 | const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe); |
| 17 | const gen_out = run_gen_exe.addOutputFileArg("gbp.zig"); | 17 | const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.zig"); |
| 18 | |||
| 19 | const emoji_gen_exe = b.addExecutable(.{ | ||
| 20 | .name = "emoji_gen", | ||
| 21 | .root_source_file = .{ .path = "src/emoji_gen.zig" }, | ||
| 22 | .target = target, | ||
| 23 | .optimize = optimize, | ||
| 24 | }); | ||
| 25 | emoji_gen_exe.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); | ||
| 26 | const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe); | ||
| 27 | const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.zig"); | ||
| 18 | 28 | ||
| 19 | const exe = b.addExecutable(.{ | 29 | const exe = b.addExecutable(.{ |
| 20 | .name = "zgbench", | 30 | .name = "zgbench", |
| @@ -23,7 +33,8 @@ pub fn build(b: *std.Build) void { | |||
| 23 | .optimize = optimize, | 33 | .optimize = optimize, |
| 24 | }); | 34 | }); |
| 25 | exe.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); | 35 | exe.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); |
| 26 | exe.root_module.addAnonymousImport("gbp", .{ .root_source_file = gen_out }); | 36 | exe.root_module.addAnonymousImport("gbp", .{ .root_source_file = gbp_gen_out }); |
| 37 | exe.root_module.addAnonymousImport("emoji", .{ .root_source_file = emoji_gen_out }); | ||
| 27 | b.installArtifact(exe); | 38 | b.installArtifact(exe); |
| 28 | 39 | ||
| 29 | const run_cmd = b.addRunArtifact(exe); | 40 | const run_cmd = b.addRunArtifact(exe); |
| @@ -39,7 +50,8 @@ pub fn build(b: *std.Build) void { | |||
| 39 | .optimize = optimize, | 50 | .optimize = optimize, |
| 40 | }); | 51 | }); |
| 41 | exe_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); | 52 | exe_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); |
| 42 | exe_unit_tests.root_module.addAnonymousImport("gbp", .{ .root_source_file = gen_out }); | 53 | exe_unit_tests.root_module.addAnonymousImport("gbp", .{ .root_source_file = gbp_gen_out }); |
| 54 | exe_unit_tests.root_module.addAnonymousImport("emoji", .{ .root_source_file = emoji_gen_out }); | ||
| 43 | 55 | ||
| 44 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); | 56 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); |
| 45 | 57 | ||
diff --git a/src/Grapheme.zig b/src/Grapheme.zig index 1e9606f..1165a0e 100644 --- a/src/Grapheme.zig +++ b/src/Grapheme.zig | |||
| @@ -7,9 +7,10 @@ const ziglyph = @import("ziglyph"); | |||
| 7 | const CodePoint = ziglyph.CodePoint; | 7 | const CodePoint = ziglyph.CodePoint; |
| 8 | const CodePointIterator = CodePoint.CodePointIterator; | 8 | const CodePointIterator = CodePoint.CodePointIterator; |
| 9 | const readCodePoint = CodePoint.readCodePoint; | 9 | const readCodePoint = CodePoint.readCodePoint; |
| 10 | const emoji = ziglyph.emoji; | 10 | // const emoji = ziglyph.emoji; |
| 11 | // const gbp = ziglyph.grapheme_break; | 11 | // const gbp = ziglyph.grapheme_break; |
| 12 | const gbp = @import("gbp"); | 12 | const gbp = @import("gbp"); |
| 13 | const emoji = @import("emoji"); | ||
| 13 | 14 | ||
| 14 | pub const Grapheme = @This(); | 15 | pub const Grapheme = @This(); |
| 15 | 16 | ||
diff --git a/src/emoji_gen.zig b/src/emoji_gen.zig new file mode 100644 index 0000000..acad0ca --- /dev/null +++ b/src/emoji_gen.zig | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | |||
| 3 | const emoji = @import("ziglyph").emoji; | ||
| 4 | |||
| 5 | const block_size = 256; | ||
| 6 | const Block = [block_size]bool; | ||
| 7 | |||
| 8 | const BlockMap = std.HashMap( | ||
| 9 | Block, | ||
| 10 | u16, | ||
| 11 | struct { | ||
| 12 | pub fn hash(_: @This(), k: Block) u64 { | ||
| 13 | var hasher = std.hash.Wyhash.init(0); | ||
| 14 | std.hash.autoHashStrat(&hasher, k, .DeepRecursive); | ||
| 15 | return hasher.final(); | ||
| 16 | } | ||
| 17 | |||
| 18 | pub fn eql(_: @This(), a: Block, b: Block) bool { | ||
| 19 | return std.mem.eql(bool, &a, &b); | ||
| 20 | } | ||
| 21 | }, | ||
| 22 | std.hash_map.default_max_load_percentage, | ||
| 23 | ); | ||
| 24 | |||
| 25 | pub fn main() !void { | ||
| 26 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | ||
| 27 | defer arena.deinit(); | ||
| 28 | const allocator = arena.allocator(); | ||
| 29 | |||
| 30 | var blocks_map = BlockMap.init(allocator); | ||
| 31 | defer blocks_map.deinit(); | ||
| 32 | |||
| 33 | var stage1 = std.ArrayList(u16).init(allocator); | ||
| 34 | defer stage1.deinit(); | ||
| 35 | |||
| 36 | var stage2 = std.ArrayList(bool).init(allocator); | ||
| 37 | defer stage2.deinit(); | ||
| 38 | |||
| 39 | var block: Block = [_]bool{false} ** block_size; | ||
| 40 | var block_len: u16 = 0; | ||
| 41 | |||
| 42 | for (0..0x10ffff + 1) |cp| { | ||
| 43 | const isEmoji = emoji.isExtendedPictographic(@intCast(cp)); | ||
| 44 | |||
| 45 | block[block_len] = isEmoji; | ||
| 46 | block_len += 1; | ||
| 47 | |||
| 48 | if (block_len < block_size and cp != 0x10ffff) continue; | ||
| 49 | |||
| 50 | const gop = try blocks_map.getOrPut(block); | ||
| 51 | if (!gop.found_existing) { | ||
| 52 | gop.value_ptr.* = @intCast(stage2.items.len); | ||
| 53 | try stage2.appendSlice(&block); | ||
| 54 | } | ||
| 55 | |||
| 56 | try stage1.append(gop.value_ptr.*); | ||
| 57 | block_len = 0; | ||
| 58 | } | ||
| 59 | |||
| 60 | var args_iter = std.process.args(); | ||
| 61 | _ = args_iter.skip(); | ||
| 62 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | ||
| 63 | |||
| 64 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | ||
| 65 | defer out_file.close(); | ||
| 66 | var out_buf = std.io.bufferedWriter(out_file.writer()); | ||
| 67 | const writer = out_buf.writer(); | ||
| 68 | |||
| 69 | try writer.print("const stage_1 = [{}]u16{{", .{stage1.items.len}); | ||
| 70 | for (stage1.items) |v| { | ||
| 71 | _ = try writer.print("{},", .{v}); | ||
| 72 | } | ||
| 73 | try writer.writeAll("};\n"); | ||
| 74 | |||
| 75 | try writer.print("const stage_2 = [{}]bool{{", .{stage2.items.len}); | ||
| 76 | for (stage2.items) |v| { | ||
| 77 | _ = try writer.print("{},", .{v}); | ||
| 78 | } | ||
| 79 | try writer.writeAll("};\n"); | ||
| 80 | |||
| 81 | const code = | ||
| 82 | \\pub inline fn isExtendedPictographic(cp: u21) bool { | ||
| 83 | \\ const stage_1_index = cp >> 8; | ||
| 84 | \\ const stage_2_index = stage_1[stage_1_index] + (cp & 0xff); | ||
| 85 | \\ return stage_2[stage_2_index]; | ||
| 86 | \\} | ||
| 87 | \\ | ||
| 88 | ; | ||
| 89 | |||
| 90 | try writer.writeAll(code); | ||
| 91 | |||
| 92 | try out_buf.flush(); | ||
| 93 | } | ||