summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jacob Sandlund2025-06-24 07:55:16 -0400
committerGravatar Jacob Sandlund2025-06-24 07:55:16 -0400
commitd7df2b4b92e198fbdbe5cfc29095d48980675004 (patch)
tree3a6a8afa88dfcbee6b5733769604060d0f95b7b9
parentMerge pull request 'Fix benchmarks' (#56) from jacobsandlund/zg:benchmarks in... (diff)
downloadzg-d7df2b4b92e198fbdbe5cfc29095d48980675004.tar.gz
zg-d7df2b4b92e198fbdbe5cfc29095d48980675004.tar.xz
zg-d7df2b4b92e198fbdbe5cfc29095d48980675004.zip
Add Emoji module and codegen/emoji
-rw-r--r--README.md28
-rw-r--r--build.zig27
-rw-r--r--codegen/emoji.zig146
-rw-r--r--src/Emoji.zig132
4 files changed, 333 insertions, 0 deletions
diff --git a/README.md b/README.md
index bfa8d5e..5912ce4 100644
--- a/README.md
+++ b/README.md
@@ -520,6 +520,34 @@ test "Scripts" {
520} 520}
521``` 521```
522 522
523## Emoji
524
525To get information about emoji and emoji-like characters, use the `Emoji` module.
526
527In your `build.zig`:
528
529```zig
530exe.root_module.addImport("Emoji", zg.module("Emoji"));
531```
532
533In your code:
534
535```zig
536const Emoji = @import("Emoji");
537
538test "Emoji" {
539 const emoji = try Emoji.init(allocator);
540 defer emoji.deinit(allocator);
541
542 try expect(emoji.isEmoji(0x1F415)); // πŸ•
543 try expect(emoji.isEmojiPresentation(0x1F408)); // 🐈
544 try expect(emoji.isEmojiModifier(0x1F3FF)); // 🏿
545 try expect(emoji.isEmojiModifierBase(0x1F977)); // πŸ₯·
546 try expect(emoji.isEmojiComponent(0x1F9B0)); // 🦰
547 try expect(emoji.isExtendedPictographic(0x1F005)); // πŸ€…
548}
549```
550
523## Relation to Ziglyph 551## Relation to Ziglyph
524 552
525zg is a total re-write of some of the components of Ziglyph. The idea was to 553zg is a total re-write of some of the components of Ziglyph. The idea was to
diff --git a/build.zig b/build.zig
index 58fd3e7..839cf13 100644
--- a/build.zig
+++ b/build.zig
@@ -22,6 +22,16 @@ pub fn build(b: *std.Build) void {
22 const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe); 22 const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe);
23 const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z"); 23 const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z");
24 24
25 // Emoji
26 const emoji_gen_exe = b.addExecutable(.{
27 .name = "emoji",
28 .root_source_file = b.path("codegen/emoji.zig"),
29 .target = b.graph.host,
30 .optimize = .Debug,
31 });
32 const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe);
33 const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.bin.z");
34
25 // Display width 35 // Display width
26 const cjk = b.option(bool, "cjk", "Ambiguous code points are wide (display width: 2).") orelse false; 36 const cjk = b.option(bool, "cjk", "Ambiguous code points are wide (display width: 2).") orelse false;
27 const options = b.addOptions(); 37 const options = b.addOptions();
@@ -215,6 +225,22 @@ pub fn build(b: *std.Build) void {
215 }); 225 });
216 const grapheme_tr = b.addRunArtifact(grapheme_t); 226 const grapheme_tr = b.addRunArtifact(grapheme_t);
217 227
228 // Emoji module
229 const emoji = b.addModule("Emoji", .{
230 .root_source_file = b.path("src/Emoji.zig"),
231 .target = target,
232 .optimize = optimize,
233 });
234 emoji.addAnonymousImport("emoji", .{ .root_source_file = emoji_gen_out });
235
236 const emoji_t = b.addTest(.{
237 .name = "Emoji",
238 .root_module = emoji,
239 .target = target,
240 .optimize = optimize,
241 });
242 const emoji_tr = b.addRunArtifact(emoji_t);
243
218 // ASCII utilities 244 // ASCII utilities
219 const ascii = b.addModule("ascii", .{ 245 const ascii = b.addModule("ascii", .{
220 .root_source_file = b.path("src/ascii.zig"), 246 .root_source_file = b.path("src/ascii.zig"),
@@ -452,6 +478,7 @@ pub fn build(b: *std.Build) void {
452 test_step.dependOn(&code_point_tr.step); 478 test_step.dependOn(&code_point_tr.step);
453 test_step.dependOn(&display_width_tr.step); 479 test_step.dependOn(&display_width_tr.step);
454 test_step.dependOn(&grapheme_tr.step); 480 test_step.dependOn(&grapheme_tr.step);
481 test_step.dependOn(&emoji_tr.step);
455 test_step.dependOn(&ascii_tr.step); 482 test_step.dependOn(&ascii_tr.step);
456 test_step.dependOn(&ccc_data_tr.step); 483 test_step.dependOn(&ccc_data_tr.step);
457 test_step.dependOn(&canon_data_tr.step); 484 test_step.dependOn(&canon_data_tr.step);
diff --git a/codegen/emoji.zig b/codegen/emoji.zig
new file mode 100644
index 0000000..0a4dbe6
--- /dev/null
+++ b/codegen/emoji.zig
@@ -0,0 +1,146 @@
1const std = @import("std");
2const builtin = @import("builtin");
3
4pub const Emoji = packed struct {
5 Emoji: bool = false,
6 Emoji_Presentation: bool = false,
7 Emoji_Modifier: bool = false,
8 Emoji_Modifier_Base: bool = false,
9 Emoji_Component: bool = false,
10 Extended_Pictographic: bool = false,
11};
12
13const block_size = 256;
14const Block = [block_size]u6;
15
16comptime {
17 if (@bitSizeOf(u6) != @bitSizeOf(Emoji)) {
18 @compileError("Emoji doesn't have expected bit size.");
19 }
20}
21
22const BlockMap = std.HashMap(
23 Block,
24 u16,
25 struct {
26 pub fn hash(_: @This(), k: Block) u64 {
27 var hasher = std.hash.Wyhash.init(0);
28 std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
29 return hasher.final();
30 }
31
32 pub fn eql(_: @This(), a: Block, b: Block) bool {
33 return std.mem.eql(u6, &a, &b);
34 }
35 },
36 std.hash_map.default_max_load_percentage,
37);
38
39pub fn main() !void {
40 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
41 defer arena.deinit();
42 const allocator = arena.allocator();
43
44 var emoji_map = std.AutoHashMap(u21, Emoji).init(allocator);
45 defer emoji_map.deinit();
46
47 var line_buf: [4096]u8 = undefined;
48
49 // Process Emoji
50 var in_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
51 defer in_file.close();
52 var in_buf = std.io.bufferedReader(in_file.reader());
53 const in_reader = in_buf.reader();
54
55 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
56 if (line.len == 0 or line[0] == '#') continue;
57 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
58
59 var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
60 var current_code: [2]u21 = undefined;
61
62 var i: usize = 0;
63 while (field_iter.next()) |field| : (i += 1) {
64 switch (i) {
65 0 => {
66 // Code point(s)
67 if (std.mem.indexOf(u8, field, "..")) |dots| {
68 current_code = .{
69 try std.fmt.parseInt(u21, field[0..dots], 16),
70 try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
71 };
72 } else {
73 const code = try std.fmt.parseInt(u21, field, 16);
74 current_code = .{ code, code };
75 }
76 },
77 1 => {
78 // Prop
79 const prop = std.meta.stringToEnum(std.meta.FieldEnum(Emoji), field) orelse return error.InvalidProp;
80 for (current_code[0]..current_code[1] + 1) |code| {
81 const cp: u21 = @intCast(code);
82 const gop = try emoji_map.getOrPut(cp);
83 if (!gop.found_existing) gop.value_ptr.* = .{};
84 switch (prop) {
85 inline else => |tag| {
86 @field(gop.value_ptr.*, @tagName(tag)) = true;
87 },
88 }
89 }
90 },
91 else => {},
92 }
93 }
94 }
95
96 var blocks_map = BlockMap.init(allocator);
97 defer blocks_map.deinit();
98
99 var stage1 = std.ArrayList(u16).init(allocator);
100 defer stage1.deinit();
101
102 var stage2 = std.ArrayList(u6).init(allocator);
103 defer stage2.deinit();
104
105 var block: Block = [_]u6{0} ** block_size;
106 var block_len: u16 = 0;
107
108 for (0..0x110000) |i| {
109 const cp: u21 = @intCast(i);
110 const emoji = emoji_map.get(cp) orelse Emoji{};
111
112 block[block_len] = @bitCast(emoji);
113 block_len += 1;
114
115 if (block_len < block_size and cp != 0x10ffff) continue;
116
117 const gop = try blocks_map.getOrPut(block);
118 if (!gop.found_existing) {
119 gop.value_ptr.* = @intCast(stage2.items.len);
120 try stage2.appendSlice(&block);
121 }
122
123 try stage1.append(gop.value_ptr.*);
124 block_len = 0;
125 }
126
127 var args_iter = try std.process.argsWithAllocator(allocator);
128 defer args_iter.deinit();
129 _ = args_iter.skip();
130 const output_path = args_iter.next() orelse @panic("No output file arg!");
131
132 const compressor = std.compress.flate.deflate.compressor;
133 var out_file = try std.fs.cwd().createFile(output_path, .{});
134 defer out_file.close();
135 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best });
136 const writer = out_comp.writer();
137
138 const endian = builtin.cpu.arch.endian();
139 try writer.writeInt(u16, @intCast(stage1.items.len), endian);
140 for (stage1.items) |i| try writer.writeInt(u16, i, endian);
141
142 try writer.writeInt(u16, @intCast(stage2.items.len), endian);
143 for (stage2.items) |i| try writer.writeInt(u8, i, endian);
144
145 try out_comp.flush();
146}
diff --git a/src/Emoji.zig b/src/Emoji.zig
new file mode 100644
index 0000000..bf7014d
--- /dev/null
+++ b/src/Emoji.zig
@@ -0,0 +1,132 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const mem = std.mem;
4const Allocator = mem.Allocator;
5const compress = std.compress;
6const unicode = std.unicode;
7
8const CodePoint = @import("code_point").CodePoint;
9const CodePointIterator = @import("code_point").Iterator;
10
11s1: []u16 = undefined,
12s2: []u6 = undefined,
13
14const Emoji = @This();
15
16// This must be an exact match of `Emoji` from `codegen/emoji.zig`.
17pub const Properties = packed struct {
18 Emoji: bool = false,
19 Emoji_Presentation: bool = false,
20 Emoji_Modifier: bool = false,
21 Emoji_Modifier_Base: bool = false,
22 Emoji_Component: bool = false,
23 Extended_Pictographic: bool = false,
24};
25
26pub fn init(allocator: Allocator) Allocator.Error!Emoji {
27 var emoji = Emoji{};
28 try emoji.setup(allocator);
29 return emoji;
30}
31
32pub fn setup(emoji: *Emoji, allocator: Allocator) Allocator.Error!void {
33 const decompressor = compress.flate.inflate.decompressor;
34 const in_bytes = @embedFile("emoji");
35 var in_fbs = std.io.fixedBufferStream(in_bytes);
36 var in_decomp = decompressor(.raw, in_fbs.reader());
37 var reader = in_decomp.reader();
38
39 const endian = builtin.cpu.arch.endian();
40
41 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
42 emoji.s1 = try allocator.alloc(u16, s1_len);
43 errdefer allocator.free(emoji.s1);
44 for (0..s1_len) |i| emoji.s1[i] = reader.readInt(u16, endian) catch unreachable;
45
46 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
47 emoji.s2 = try allocator.alloc(u6, s2_len);
48 errdefer allocator.free(emoji.s2);
49 for (0..s2_len) |i| emoji.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
50}
51
52pub fn deinit(emoji: *const Emoji, allocator: Allocator) void {
53 allocator.free(emoji.s1);
54 allocator.free(emoji.s2);
55}
56
57/// Lookup the emoji properties for a code point.
58fn properties(emoji: Emoji, cp: u21) Properties {
59 return @bitCast(emoji.s2[emoji.s1[cp >> 8] + (cp & 0xff)]);
60}
61
62pub fn isEmoji(emoji: Emoji, cp: u21) bool {
63 return properties(emoji, cp).Emoji;
64}
65
66pub fn isEmojiPresentation(emoji: Emoji, cp: u21) bool {
67 return properties(emoji, cp).Emoji_Presentation;
68}
69
70pub fn isEmojiModifier(emoji: Emoji, cp: u21) bool {
71 return properties(emoji, cp).Emoji_Modifier;
72}
73
74pub fn isEmojiModifierBase(emoji: Emoji, cp: u21) bool {
75 return properties(emoji, cp).Emoji_Modifier_Base;
76}
77
78pub fn isEmojiComponent(emoji: Emoji, cp: u21) bool {
79 return properties(emoji, cp).Emoji_Component;
80}
81
82pub fn isExtendedPictographic(emoji: Emoji, cp: u21) bool {
83 return properties(emoji, cp).Extended_Pictographic;
84}
85
86test "isEmoji" {
87 const emoji = try Emoji.init(std.testing.allocator);
88 defer emoji.deinit(std.testing.allocator);
89
90 try std.testing.expect(emoji.isEmoji(0x1F415)); // πŸ•
91 try std.testing.expect(!emoji.isEmoji(0x3042)); // あ
92}
93
94test "isEmojiPresentation" {
95 const emoji = try Emoji.init(std.testing.allocator);
96 defer emoji.deinit(std.testing.allocator);
97
98 try std.testing.expect(emoji.isEmojiPresentation(0x1F408)); // 🐈
99 try std.testing.expect(!emoji.isEmojiPresentation(0x267E)); // ♾️
100}
101
102test "isEmojiModifier" {
103 const emoji = try Emoji.init(std.testing.allocator);
104 defer emoji.deinit(std.testing.allocator);
105
106 try std.testing.expect(emoji.isEmojiModifier(0x1F3FF)); // 🏿
107 try std.testing.expect(!emoji.isEmojiModifier(0x1F385)); // πŸŽ…
108}
109
110test "isEmojiModifierBase" {
111 const emoji = try Emoji.init(std.testing.allocator);
112 defer emoji.deinit(std.testing.allocator);
113
114 try std.testing.expect(emoji.isEmojiModifierBase(0x1F977)); // πŸ₯·
115 try std.testing.expect(!emoji.isEmojiModifierBase(0x1F4F8)); // πŸ“Έ
116}
117
118test "isEmojiComponent" {
119 const emoji = try Emoji.init(std.testing.allocator);
120 defer emoji.deinit(std.testing.allocator);
121
122 try std.testing.expect(emoji.isEmojiComponent(0x1F9B0)); // 🦰
123 try std.testing.expect(!emoji.isEmojiComponent(0x1F9B5)); // 🦡
124}
125
126test "isExtendedPictographic" {
127 const emoji = try Emoji.init(std.testing.allocator);
128 defer emoji.deinit(std.testing.allocator);
129
130 try std.testing.expect(emoji.isExtendedPictographic(0x1F005)); // πŸ€…
131 try std.testing.expect(!emoji.isExtendedPictographic(0x2A)); // *
132}