summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-05 08:01:48 -0500
committerGravatar Sam Atman2026-02-05 08:01:48 -0500
commite485c04ff794a30d21c4a77cccda52b581e95881 (patch)
tree51e472a83c138312436c1e436b57394c41fbae14
parentMerge remote-tracking branch 'jacob/emoji' into no-allocation (diff)
downloadzg-e485c04ff794a30d21c4a77cccda52b581e95881.tar.gz
zg-e485c04ff794a30d21c4a77cccda52b581e95881.tar.xz
zg-e485c04ff794a30d21c4a77cccda52b581e95881.zip
De-allocate Emoji module
-rw-r--r--build.zig13
-rw-r--r--codegen/emoji.zig56
-rw-r--r--src/Emoji.zig130
3 files changed, 86 insertions, 113 deletions
diff --git a/build.zig b/build.zig
index 6ba73d9..ee2a6ec 100644
--- a/build.zig
+++ b/build.zig
@@ -57,12 +57,15 @@ pub fn build(b: *std.Build) void {
57 // Emoji 57 // Emoji
58 const emoji_gen_exe = b.addExecutable(.{ 58 const emoji_gen_exe = b.addExecutable(.{
59 .name = "emoji", 59 .name = "emoji",
60 .root_source_file = b.path("codegen/emoji.zig"), 60 .root_module = b.createModule(.{
61 .target = b.graph.host, 61 .root_source_file = b.path("codegen/emoji.zig"),
62 .optimize = .Debug, 62 .target = b.graph.host,
63 .optimize = .Debug,
64 }),
63 }); 65 });
66 emoji_gen_exe.root_module.addAnonymousImport("emoji-data.txt", .{ .root_source_file = b.path("data/unicode/emoji/emoji-data.txt") });
64 const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe); 67 const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe);
65 const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.bin.z"); 68 const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.zig");
66 69
67 const wbp_gen_exe = b.addExecutable(.{ 70 const wbp_gen_exe = b.addExecutable(.{
68 .name = "wbp", 71 .name = "wbp",
@@ -283,8 +286,6 @@ pub fn build(b: *std.Build) void {
283 const emoji_t = b.addTest(.{ 286 const emoji_t = b.addTest(.{
284 .name = "Emoji", 287 .name = "Emoji",
285 .root_module = emoji, 288 .root_module = emoji,
286 .target = target,
287 .optimize = optimize,
288 }); 289 });
289 const emoji_tr = b.addRunArtifact(emoji_t); 290 const emoji_tr = b.addRunArtifact(emoji_t);
290 291
diff --git a/codegen/emoji.zig b/codegen/emoji.zig
index 0a4dbe6..c44c483 100644
--- a/codegen/emoji.zig
+++ b/codegen/emoji.zig
@@ -44,21 +44,17 @@ pub fn main() !void {
44 var emoji_map = std.AutoHashMap(u21, Emoji).init(allocator); 44 var emoji_map = std.AutoHashMap(u21, Emoji).init(allocator);
45 defer emoji_map.deinit(); 45 defer emoji_map.deinit();
46 46
47 var line_buf: [4096]u8 = undefined;
48
49 // Process Emoji 47 // Process Emoji
50 var in_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
51 defer in_file.close();
52 var in_buf = std.io.bufferedReader(in_file.reader());
53 const in_reader = in_buf.reader();
54 48
55 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 49 var @"emo-reader" = std.io.Reader.fixed(@embedFile("emoji-data.txt"));
56 if (line.len == 0 or line[0] == '#') continue; 50 var count: usize = 0; // XXX: remove
51 while (@"emo-reader".takeDelimiterInclusive('\n')) |line| {
52 count += 1;
53 if (line.len <= 1 or line[0] == '#') continue;
57 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 54 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
58 55
59 var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); 56 var field_iter = std.mem.tokenizeAny(u8, no_comment, "; ");
60 var current_code: [2]u21 = undefined; 57 var current_code: [2]u21 = undefined;
61
62 var i: usize = 0; 58 var i: usize = 0;
63 while (field_iter.next()) |field| : (i += 1) { 59 while (field_iter.next()) |field| : (i += 1) {
64 switch (i) { 60 switch (i) {
@@ -91,15 +87,20 @@ pub fn main() !void {
91 else => {}, 87 else => {},
92 } 88 }
93 } 89 }
90 } else |err| switch (err) {
91 error.EndOfStream => {},
92 else => {
93 return err;
94 },
94 } 95 }
95 96
96 var blocks_map = BlockMap.init(allocator); 97 var blocks_map = BlockMap.init(allocator);
97 defer blocks_map.deinit(); 98 defer blocks_map.deinit();
98 99
99 var stage1 = std.ArrayList(u16).init(allocator); 100 var stage1 = std.array_list.Managed(u16).init(allocator);
100 defer stage1.deinit(); 101 defer stage1.deinit();
101 102
102 var stage2 = std.ArrayList(u6).init(allocator); 103 var stage2 = std.array_list.Managed(u6).init(allocator);
103 defer stage2.deinit(); 104 defer stage2.deinit();
104 105
105 var block: Block = [_]u6{0} ** block_size; 106 var block: Block = [_]u6{0} ** block_size;
@@ -129,18 +130,31 @@ pub fn main() !void {
129 _ = args_iter.skip(); 130 _ = args_iter.skip();
130 const output_path = args_iter.next() orelse @panic("No output file arg!"); 131 const output_path = args_iter.next() orelse @panic("No output file arg!");
131 132
132 const compressor = std.compress.flate.deflate.compressor; 133 var write_buf: [4096]u8 = undefined;
133 var out_file = try std.fs.cwd().createFile(output_path, .{}); 134 var out_file = try std.fs.cwd().createFile(output_path, .{});
134 defer out_file.close(); 135 defer out_file.close();
135 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 136 var writer = out_file.writer(&write_buf);
136 const writer = out_comp.writer(); 137
137 138 try writer.interface.print(
138 const endian = builtin.cpu.arch.endian(); 139 \\//! This file is auto-generated. Do not edit.
139 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 140 \\
140 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 141 \\pub const s1: [{}]u16 = .{{
142 , .{stage1.items.len});
143 for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry});
144
145 try writer.interface.print(
146 \\
147 \\}};
148 \\
149 \\pub const s2: [{}]u6 = .{{
150 , .{stage2.items.len});
151 for (stage2.items) |entry| {
152 try writer.interface.print("{}, ", .{entry});
153 }
141 154
142 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 155 try writer.interface.writeAll(
143 for (stage2.items) |i| try writer.writeInt(u8, i, endian); 156 \\};
157 );
144 158
145 try out_comp.flush(); 159 try writer.interface.flush();
146} 160}
diff --git a/src/Emoji.zig b/src/Emoji.zig
index 75b44c2..13f675b 100644
--- a/src/Emoji.zig
+++ b/src/Emoji.zig
@@ -1,17 +1,17 @@
1const std = @import("std"); 1//! Emoji module
2const builtin = @import("builtin");
3const mem = std.mem;
4const Allocator = mem.Allocator;
5const compress = std.compress;
6const unicode = std.unicode;
7
8const CodePoint = @import("code_point").CodePoint;
9const CodePointIterator = @import("code_point").Iterator;
10 2
11s1: []u16 = undefined, 3const Data = struct {
12s2: []u6 = undefined, 4 s1: []const u16 = undefined,
5 s2: []const u6 = undefined,
6};
13 7
14const Emoji = @This(); 8const emoji = emoji: {
9 const data = @import("emoji");
10 break :emoji Data{
11 .s1 = &data.s1,
12 .s2 = &data.s2,
13 };
14};
15 15
16// This must be an exact match of `Emoji` from `codegen/emoji.zig`. 16// This must be an exact match of `Emoji` from `codegen/emoji.zig`.
17pub const Properties = packed struct { 17pub const Properties = packed struct {
@@ -23,110 +23,68 @@ pub const Properties = packed struct {
23 Extended_Pictographic: bool = false, 23 Extended_Pictographic: bool = false,
24}; 24};
25 25
26pub fn init(allocator: Allocator) Allocator.Error!Emoji {
27 var emoji = Emoji{};
28 try emoji.setup(allocator);
29 return emoji;
30}
31
32pub fn setup(emoji: *Emoji, allocator: Allocator) Allocator.Error!void {
33 const decompressor = compress.flate.inflate.decompressor;
34 const in_bytes = @embedFile("emoji");
35 var in_fbs = std.io.fixedBufferStream(in_bytes);
36 var in_decomp = decompressor(.raw, in_fbs.reader());
37 var reader = in_decomp.reader();
38
39 const endian = builtin.cpu.arch.endian();
40
41 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
42 emoji.s1 = try allocator.alloc(u16, s1_len);
43 errdefer allocator.free(emoji.s1);
44 for (0..s1_len) |i| emoji.s1[i] = reader.readInt(u16, endian) catch unreachable;
45
46 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
47 emoji.s2 = try allocator.alloc(u6, s2_len);
48 errdefer allocator.free(emoji.s2);
49 for (0..s2_len) |i| emoji.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
50}
51
52pub fn deinit(emoji: *const Emoji, allocator: Allocator) void {
53 allocator.free(emoji.s1);
54 allocator.free(emoji.s2);
55}
56
57/// Lookup the emoji properties for a code point. 26/// Lookup the emoji properties for a code point.
58fn properties(emoji: Emoji, cp: u21) Properties { 27fn properties(cp: u21) Properties {
59 return @bitCast(emoji.s2[emoji.s1[cp >> 8] + (cp & 0xff)]); 28 return @bitCast(emoji.s2[emoji.s1[cp >> 8] + (cp & 0xff)]);
60} 29}
61 30
62pub fn isEmoji(emoji: Emoji, cp: u21) bool { 31pub fn isEmoji(cp: u21) bool {
63 return properties(emoji, cp).Emoji; 32 return properties(cp).Emoji;
64} 33}
65 34
66pub fn isEmojiPresentation(emoji: Emoji, cp: u21) bool { 35pub fn isEmojiPresentation(cp: u21) bool {
67 return properties(emoji, cp).Emoji_Presentation; 36 return properties(cp).Emoji_Presentation;
68} 37}
69 38
70pub fn isEmojiModifier(emoji: Emoji, cp: u21) bool { 39pub fn isEmojiModifier(cp: u21) bool {
71 return properties(emoji, cp).Emoji_Modifier; 40 return properties(cp).Emoji_Modifier;
72} 41}
73 42
74pub fn isEmojiModifierBase(emoji: Emoji, cp: u21) bool { 43pub fn isEmojiModifierBase(cp: u21) bool {
75 return properties(emoji, cp).Emoji_Modifier_Base; 44 return properties(cp).Emoji_Modifier_Base;
76} 45}
77 46
78pub fn isEmojiComponent(emoji: Emoji, cp: u21) bool { 47pub fn isEmojiComponent(cp: u21) bool {
79 return properties(emoji, cp).Emoji_Component; 48 return properties(cp).Emoji_Component;
80} 49}
81 50
82pub fn isExtendedPictographic(emoji: Emoji, cp: u21) bool { 51pub fn isExtendedPictographic(cp: u21) bool {
83 return properties(emoji, cp).Extended_Pictographic; 52 return properties(cp).Extended_Pictographic;
84} 53}
85 54
86test "isEmoji" { 55test "isEmoji" {
87 const emoji = try Emoji.init(std.testing.allocator); 56 try std.testing.expect(isEmoji(0x1F415)); // 🐕
88 defer emoji.deinit(std.testing.allocator); 57 try std.testing.expect(!isEmoji(0x3042)); // あ
89
90 try std.testing.expect(emoji.isEmoji(0x1F415)); // 🐕
91 try std.testing.expect(!emoji.isEmoji(0x3042)); // あ
92} 58}
93 59
94test "isEmojiPresentation" { 60test "isEmojiPresentation" {
95 const emoji = try Emoji.init(std.testing.allocator); 61 try std.testing.expect(isEmojiPresentation(0x1F408)); // 🐈
96 defer emoji.deinit(std.testing.allocator); 62 try std.testing.expect(!isEmojiPresentation(0x267E)); // ♾
97
98 try std.testing.expect(emoji.isEmojiPresentation(0x1F408)); // 🐈
99 try std.testing.expect(!emoji.isEmojiPresentation(0x267E)); // ♾
100} 63}
101 64
102test "isEmojiModifier" { 65test "isEmojiModifier" {
103 const emoji = try Emoji.init(std.testing.allocator); 66 try std.testing.expect(isEmojiModifier(0x1F3FF)); //
104 defer emoji.deinit(std.testing.allocator); 67 try std.testing.expect(!isEmojiModifier(0x1F385)); // 🎅
105
106 try std.testing.expect(emoji.isEmojiModifier(0x1F3FF)); // 🏿
107 try std.testing.expect(!emoji.isEmojiModifier(0x1F385)); // 🎅
108} 68}
109 69
110test "isEmojiModifierBase" { 70test "isEmojiModifierBase" {
111 const emoji = try Emoji.init(std.testing.allocator); 71 try std.testing.expect(isEmojiModifierBase(0x1F977)); // 🥷
112 defer emoji.deinit(std.testing.allocator); 72 try std.testing.expect(!isEmojiModifierBase(0x1F4F8)); // 📸
113
114 try std.testing.expect(emoji.isEmojiModifierBase(0x1F977)); // 🥷
115 try std.testing.expect(!emoji.isEmojiModifierBase(0x1F4F8)); // 📸
116} 73}
117 74
118test "isEmojiComponent" { 75test "isEmojiComponent" {
119 const emoji = try Emoji.init(std.testing.allocator); 76 try std.testing.expect(isEmojiComponent(0x1F9B0)); // 🦰
120 defer emoji.deinit(std.testing.allocator); 77 try std.testing.expect(!isEmojiComponent(0x1F9B5)); // 🦵
121
122 try std.testing.expect(emoji.isEmojiComponent(0x1F9B0)); // 🦰
123 try std.testing.expect(!emoji.isEmojiComponent(0x1F9B5)); // 🦵
124} 78}
125 79
126test "isExtendedPictographic" { 80test "isExtendedPictographic" {
127 const emoji = try Emoji.init(std.testing.allocator); 81 try std.testing.expect(isExtendedPictographic(0x1F005)); // 🀅
128 defer emoji.deinit(std.testing.allocator); 82 try std.testing.expect(!isExtendedPictographic(0x2A)); // *
129
130 try std.testing.expect(emoji.isExtendedPictographic(0x1F005)); // 🀅
131 try std.testing.expect(!emoji.isExtendedPictographic(0x2A)); // *
132} 83}
84
85const std = @import("std");
86const builtin = @import("builtin");
87const unicode = std.unicode;
88
89const CodePoint = @import("code_point").CodePoint;
90const CodePointIterator = @import("code_point").Iterator;