diff options
| author | 2026-02-05 08:01:48 -0500 | |
|---|---|---|
| committer | 2026-02-05 08:01:48 -0500 | |
| commit | e485c04ff794a30d21c4a77cccda52b581e95881 (patch) | |
| tree | 51e472a83c138312436c1e436b57394c41fbae14 | |
| parent | Merge remote-tracking branch 'jacob/emoji' into no-allocation (diff) | |
| download | zg-e485c04ff794a30d21c4a77cccda52b581e95881.tar.gz zg-e485c04ff794a30d21c4a77cccda52b581e95881.tar.xz zg-e485c04ff794a30d21c4a77cccda52b581e95881.zip | |
De-allocate Emoji module
| -rw-r--r-- | build.zig | 13 | ||||
| -rw-r--r-- | codegen/emoji.zig | 56 | ||||
| -rw-r--r-- | src/Emoji.zig | 130 |
3 files changed, 86 insertions, 113 deletions
| @@ -57,12 +57,15 @@ pub fn build(b: *std.Build) void { | |||
| 57 | // Emoji | 57 | // Emoji |
| 58 | const emoji_gen_exe = b.addExecutable(.{ | 58 | const emoji_gen_exe = b.addExecutable(.{ |
| 59 | .name = "emoji", | 59 | .name = "emoji", |
| 60 | .root_source_file = b.path("codegen/emoji.zig"), | 60 | .root_module = b.createModule(.{ |
| 61 | .target = b.graph.host, | 61 | .root_source_file = b.path("codegen/emoji.zig"), |
| 62 | .optimize = .Debug, | 62 | .target = b.graph.host, |
| 63 | .optimize = .Debug, | ||
| 64 | }), | ||
| 63 | }); | 65 | }); |
| 66 | emoji_gen_exe.root_module.addAnonymousImport("emoji-data.txt", .{ .root_source_file = b.path("data/unicode/emoji/emoji-data.txt") }); | ||
| 64 | const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe); | 67 | const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe); |
| 65 | const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.bin.z"); | 68 | const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.zig"); |
| 66 | 69 | ||
| 67 | const wbp_gen_exe = b.addExecutable(.{ | 70 | const wbp_gen_exe = b.addExecutable(.{ |
| 68 | .name = "wbp", | 71 | .name = "wbp", |
| @@ -283,8 +286,6 @@ pub fn build(b: *std.Build) void { | |||
| 283 | const emoji_t = b.addTest(.{ | 286 | const emoji_t = b.addTest(.{ |
| 284 | .name = "Emoji", | 287 | .name = "Emoji", |
| 285 | .root_module = emoji, | 288 | .root_module = emoji, |
| 286 | .target = target, | ||
| 287 | .optimize = optimize, | ||
| 288 | }); | 289 | }); |
| 289 | const emoji_tr = b.addRunArtifact(emoji_t); | 290 | const emoji_tr = b.addRunArtifact(emoji_t); |
| 290 | 291 | ||
diff --git a/codegen/emoji.zig b/codegen/emoji.zig index 0a4dbe6..c44c483 100644 --- a/codegen/emoji.zig +++ b/codegen/emoji.zig | |||
| @@ -44,21 +44,17 @@ pub fn main() !void { | |||
| 44 | var emoji_map = std.AutoHashMap(u21, Emoji).init(allocator); | 44 | var emoji_map = std.AutoHashMap(u21, Emoji).init(allocator); |
| 45 | defer emoji_map.deinit(); | 45 | defer emoji_map.deinit(); |
| 46 | 46 | ||
| 47 | var line_buf: [4096]u8 = undefined; | ||
| 48 | |||
| 49 | // Process Emoji | 47 | // Process Emoji |
| 50 | var in_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); | ||
| 51 | defer in_file.close(); | ||
| 52 | var in_buf = std.io.bufferedReader(in_file.reader()); | ||
| 53 | const in_reader = in_buf.reader(); | ||
| 54 | 48 | ||
| 55 | while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | 49 | var @"emo-reader" = std.io.Reader.fixed(@embedFile("emoji-data.txt")); |
| 56 | if (line.len == 0 or line[0] == '#') continue; | 50 | var count: usize = 0; // XXX: remove |
| 51 | while (@"emo-reader".takeDelimiterInclusive('\n')) |line| { | ||
| 52 | count += 1; | ||
| 53 | if (line.len <= 1 or line[0] == '#') continue; | ||
| 57 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | 54 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; |
| 58 | 55 | ||
| 59 | var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); | 56 | var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); |
| 60 | var current_code: [2]u21 = undefined; | 57 | var current_code: [2]u21 = undefined; |
| 61 | |||
| 62 | var i: usize = 0; | 58 | var i: usize = 0; |
| 63 | while (field_iter.next()) |field| : (i += 1) { | 59 | while (field_iter.next()) |field| : (i += 1) { |
| 64 | switch (i) { | 60 | switch (i) { |
| @@ -91,15 +87,20 @@ pub fn main() !void { | |||
| 91 | else => {}, | 87 | else => {}, |
| 92 | } | 88 | } |
| 93 | } | 89 | } |
| 90 | } else |err| switch (err) { | ||
| 91 | error.EndOfStream => {}, | ||
| 92 | else => { | ||
| 93 | return err; | ||
| 94 | }, | ||
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | var blocks_map = BlockMap.init(allocator); | 97 | var blocks_map = BlockMap.init(allocator); |
| 97 | defer blocks_map.deinit(); | 98 | defer blocks_map.deinit(); |
| 98 | 99 | ||
| 99 | var stage1 = std.ArrayList(u16).init(allocator); | 100 | var stage1 = std.array_list.Managed(u16).init(allocator); |
| 100 | defer stage1.deinit(); | 101 | defer stage1.deinit(); |
| 101 | 102 | ||
| 102 | var stage2 = std.ArrayList(u6).init(allocator); | 103 | var stage2 = std.array_list.Managed(u6).init(allocator); |
| 103 | defer stage2.deinit(); | 104 | defer stage2.deinit(); |
| 104 | 105 | ||
| 105 | var block: Block = [_]u6{0} ** block_size; | 106 | var block: Block = [_]u6{0} ** block_size; |
| @@ -129,18 +130,31 @@ pub fn main() !void { | |||
| 129 | _ = args_iter.skip(); | 130 | _ = args_iter.skip(); |
| 130 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 131 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 131 | 132 | ||
| 132 | const compressor = std.compress.flate.deflate.compressor; | 133 | var write_buf: [4096]u8 = undefined; |
| 133 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 134 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 134 | defer out_file.close(); | 135 | defer out_file.close(); |
| 135 | var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); | 136 | var writer = out_file.writer(&write_buf); |
| 136 | const writer = out_comp.writer(); | 137 | |
| 137 | 138 | try writer.interface.print( | |
| 138 | const endian = builtin.cpu.arch.endian(); | 139 | \\//! This file is auto-generated. Do not edit. |
| 139 | try writer.writeInt(u16, @intCast(stage1.items.len), endian); | 140 | \\ |
| 140 | for (stage1.items) |i| try writer.writeInt(u16, i, endian); | 141 | \\pub const s1: [{}]u16 = .{{ |
| 142 | , .{stage1.items.len}); | ||
| 143 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 144 | |||
| 145 | try writer.interface.print( | ||
| 146 | \\ | ||
| 147 | \\}}; | ||
| 148 | \\ | ||
| 149 | \\pub const s2: [{}]u6 = .{{ | ||
| 150 | , .{stage2.items.len}); | ||
| 151 | for (stage2.items) |entry| { | ||
| 152 | try writer.interface.print("{}, ", .{entry}); | ||
| 153 | } | ||
| 141 | 154 | ||
| 142 | try writer.writeInt(u16, @intCast(stage2.items.len), endian); | 155 | try writer.interface.writeAll( |
| 143 | for (stage2.items) |i| try writer.writeInt(u8, i, endian); | 156 | \\}; |
| 157 | ); | ||
| 144 | 158 | ||
| 145 | try out_comp.flush(); | 159 | try writer.interface.flush(); |
| 146 | } | 160 | } |
diff --git a/src/Emoji.zig b/src/Emoji.zig index 75b44c2..13f675b 100644 --- a/src/Emoji.zig +++ b/src/Emoji.zig | |||
| @@ -1,17 +1,17 @@ | |||
| 1 | const std = @import("std"); | 1 | //! Emoji module |
| 2 | const builtin = @import("builtin"); | ||
| 3 | const mem = std.mem; | ||
| 4 | const Allocator = mem.Allocator; | ||
| 5 | const compress = std.compress; | ||
| 6 | const unicode = std.unicode; | ||
| 7 | |||
| 8 | const CodePoint = @import("code_point").CodePoint; | ||
| 9 | const CodePointIterator = @import("code_point").Iterator; | ||
| 10 | 2 | ||
| 11 | s1: []u16 = undefined, | 3 | const Data = struct { |
| 12 | s2: []u6 = undefined, | 4 | s1: []const u16 = undefined, |
| 5 | s2: []const u6 = undefined, | ||
| 6 | }; | ||
| 13 | 7 | ||
| 14 | const Emoji = @This(); | 8 | const emoji = emoji: { |
| 9 | const data = @import("emoji"); | ||
| 10 | break :emoji Data{ | ||
| 11 | .s1 = &data.s1, | ||
| 12 | .s2 = &data.s2, | ||
| 13 | }; | ||
| 14 | }; | ||
| 15 | 15 | ||
| 16 | // This must be an exact match of `Emoji` from `codegen/emoji.zig`. | 16 | // This must be an exact match of `Emoji` from `codegen/emoji.zig`. |
| 17 | pub const Properties = packed struct { | 17 | pub const Properties = packed struct { |
| @@ -23,110 +23,68 @@ pub const Properties = packed struct { | |||
| 23 | Extended_Pictographic: bool = false, | 23 | Extended_Pictographic: bool = false, |
| 24 | }; | 24 | }; |
| 25 | 25 | ||
| 26 | pub fn init(allocator: Allocator) Allocator.Error!Emoji { | ||
| 27 | var emoji = Emoji{}; | ||
| 28 | try emoji.setup(allocator); | ||
| 29 | return emoji; | ||
| 30 | } | ||
| 31 | |||
| 32 | pub fn setup(emoji: *Emoji, allocator: Allocator) Allocator.Error!void { | ||
| 33 | const decompressor = compress.flate.inflate.decompressor; | ||
| 34 | const in_bytes = @embedFile("emoji"); | ||
| 35 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 36 | var in_decomp = decompressor(.raw, in_fbs.reader()); | ||
| 37 | var reader = in_decomp.reader(); | ||
| 38 | |||
| 39 | const endian = builtin.cpu.arch.endian(); | ||
| 40 | |||
| 41 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 42 | emoji.s1 = try allocator.alloc(u16, s1_len); | ||
| 43 | errdefer allocator.free(emoji.s1); | ||
| 44 | for (0..s1_len) |i| emoji.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 45 | |||
| 46 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 47 | emoji.s2 = try allocator.alloc(u6, s2_len); | ||
| 48 | errdefer allocator.free(emoji.s2); | ||
| 49 | for (0..s2_len) |i| emoji.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | ||
| 50 | } | ||
| 51 | |||
| 52 | pub fn deinit(emoji: *const Emoji, allocator: Allocator) void { | ||
| 53 | allocator.free(emoji.s1); | ||
| 54 | allocator.free(emoji.s2); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Lookup the emoji properties for a code point. | 26 | /// Lookup the emoji properties for a code point. |
| 58 | fn properties(emoji: Emoji, cp: u21) Properties { | 27 | fn properties(cp: u21) Properties { |
| 59 | return @bitCast(emoji.s2[emoji.s1[cp >> 8] + (cp & 0xff)]); | 28 | return @bitCast(emoji.s2[emoji.s1[cp >> 8] + (cp & 0xff)]); |
| 60 | } | 29 | } |
| 61 | 30 | ||
| 62 | pub fn isEmoji(emoji: Emoji, cp: u21) bool { | 31 | pub fn isEmoji(cp: u21) bool { |
| 63 | return properties(emoji, cp).Emoji; | 32 | return properties(cp).Emoji; |
| 64 | } | 33 | } |
| 65 | 34 | ||
| 66 | pub fn isEmojiPresentation(emoji: Emoji, cp: u21) bool { | 35 | pub fn isEmojiPresentation(cp: u21) bool { |
| 67 | return properties(emoji, cp).Emoji_Presentation; | 36 | return properties(cp).Emoji_Presentation; |
| 68 | } | 37 | } |
| 69 | 38 | ||
| 70 | pub fn isEmojiModifier(emoji: Emoji, cp: u21) bool { | 39 | pub fn isEmojiModifier(cp: u21) bool { |
| 71 | return properties(emoji, cp).Emoji_Modifier; | 40 | return properties(cp).Emoji_Modifier; |
| 72 | } | 41 | } |
| 73 | 42 | ||
| 74 | pub fn isEmojiModifierBase(emoji: Emoji, cp: u21) bool { | 43 | pub fn isEmojiModifierBase(cp: u21) bool { |
| 75 | return properties(emoji, cp).Emoji_Modifier_Base; | 44 | return properties(cp).Emoji_Modifier_Base; |
| 76 | } | 45 | } |
| 77 | 46 | ||
| 78 | pub fn isEmojiComponent(emoji: Emoji, cp: u21) bool { | 47 | pub fn isEmojiComponent(cp: u21) bool { |
| 79 | return properties(emoji, cp).Emoji_Component; | 48 | return properties(cp).Emoji_Component; |
| 80 | } | 49 | } |
| 81 | 50 | ||
| 82 | pub fn isExtendedPictographic(emoji: Emoji, cp: u21) bool { | 51 | pub fn isExtendedPictographic(cp: u21) bool { |
| 83 | return properties(emoji, cp).Extended_Pictographic; | 52 | return properties(cp).Extended_Pictographic; |
| 84 | } | 53 | } |
| 85 | 54 | ||
| 86 | test "isEmoji" { | 55 | test "isEmoji" { |
| 87 | const emoji = try Emoji.init(std.testing.allocator); | 56 | try std.testing.expect(isEmoji(0x1F415)); // 🐕 |
| 88 | defer emoji.deinit(std.testing.allocator); | 57 | try std.testing.expect(!isEmoji(0x3042)); // あ |
| 89 | |||
| 90 | try std.testing.expect(emoji.isEmoji(0x1F415)); // 🐕 | ||
| 91 | try std.testing.expect(!emoji.isEmoji(0x3042)); // あ | ||
| 92 | } | 58 | } |
| 93 | 59 | ||
| 94 | test "isEmojiPresentation" { | 60 | test "isEmojiPresentation" { |
| 95 | const emoji = try Emoji.init(std.testing.allocator); | 61 | try std.testing.expect(isEmojiPresentation(0x1F408)); // 🐈 |
| 96 | defer emoji.deinit(std.testing.allocator); | 62 | try std.testing.expect(!isEmojiPresentation(0x267E)); // ♾ |
| 97 | |||
| 98 | try std.testing.expect(emoji.isEmojiPresentation(0x1F408)); // 🐈 | ||
| 99 | try std.testing.expect(!emoji.isEmojiPresentation(0x267E)); // ♾ | ||
| 100 | } | 63 | } |
| 101 | 64 | ||
| 102 | test "isEmojiModifier" { | 65 | test "isEmojiModifier" { |
| 103 | const emoji = try Emoji.init(std.testing.allocator); | 66 | try std.testing.expect(isEmojiModifier(0x1F3FF)); // |
| 104 | defer emoji.deinit(std.testing.allocator); | 67 | try std.testing.expect(!isEmojiModifier(0x1F385)); // 🎅 |
| 105 | |||
| 106 | try std.testing.expect(emoji.isEmojiModifier(0x1F3FF)); // 🏿 | ||
| 107 | try std.testing.expect(!emoji.isEmojiModifier(0x1F385)); // 🎅 | ||
| 108 | } | 68 | } |
| 109 | 69 | ||
| 110 | test "isEmojiModifierBase" { | 70 | test "isEmojiModifierBase" { |
| 111 | const emoji = try Emoji.init(std.testing.allocator); | 71 | try std.testing.expect(isEmojiModifierBase(0x1F977)); // 🥷 |
| 112 | defer emoji.deinit(std.testing.allocator); | 72 | try std.testing.expect(!isEmojiModifierBase(0x1F4F8)); // 📸 |
| 113 | |||
| 114 | try std.testing.expect(emoji.isEmojiModifierBase(0x1F977)); // 🥷 | ||
| 115 | try std.testing.expect(!emoji.isEmojiModifierBase(0x1F4F8)); // 📸 | ||
| 116 | } | 73 | } |
| 117 | 74 | ||
| 118 | test "isEmojiComponent" { | 75 | test "isEmojiComponent" { |
| 119 | const emoji = try Emoji.init(std.testing.allocator); | 76 | try std.testing.expect(isEmojiComponent(0x1F9B0)); // 🦰 |
| 120 | defer emoji.deinit(std.testing.allocator); | 77 | try std.testing.expect(!isEmojiComponent(0x1F9B5)); // 🦵 |
| 121 | |||
| 122 | try std.testing.expect(emoji.isEmojiComponent(0x1F9B0)); // 🦰 | ||
| 123 | try std.testing.expect(!emoji.isEmojiComponent(0x1F9B5)); // 🦵 | ||
| 124 | } | 78 | } |
| 125 | 79 | ||
| 126 | test "isExtendedPictographic" { | 80 | test "isExtendedPictographic" { |
| 127 | const emoji = try Emoji.init(std.testing.allocator); | 81 | try std.testing.expect(isExtendedPictographic(0x1F005)); // 🀅 |
| 128 | defer emoji.deinit(std.testing.allocator); | 82 | try std.testing.expect(!isExtendedPictographic(0x2A)); // * |
| 129 | |||
| 130 | try std.testing.expect(emoji.isExtendedPictographic(0x1F005)); // 🀅 | ||
| 131 | try std.testing.expect(!emoji.isExtendedPictographic(0x2A)); // * | ||
| 132 | } | 83 | } |
| 84 | |||
| 85 | const std = @import("std"); | ||
| 86 | const builtin = @import("builtin"); | ||
| 87 | const unicode = std.unicode; | ||
| 88 | |||
| 89 | const CodePoint = @import("code_point").CodePoint; | ||
| 90 | const CodePointIterator = @import("code_point").Iterator; | ||