From e485c04ff794a30d21c4a77cccda52b581e95881 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Thu, 5 Feb 2026 08:01:48 -0500 Subject: De-allocate Emoji module --- build.zig | 13 +++--- codegen/emoji.zig | 56 ++++++++++++++--------- src/Emoji.zig | 130 ++++++++++++++++++------------------------------------ 3 files changed, 86 insertions(+), 113 deletions(-) diff --git a/build.zig b/build.zig index 6ba73d9..ee2a6ec 100644 --- a/build.zig +++ b/build.zig @@ -57,12 +57,15 @@ pub fn build(b: *std.Build) void { // Emoji const emoji_gen_exe = b.addExecutable(.{ .name = "emoji", - .root_source_file = b.path("codegen/emoji.zig"), - .target = b.graph.host, - .optimize = .Debug, + .root_module = b.createModule(.{ + .root_source_file = b.path("codegen/emoji.zig"), + .target = b.graph.host, + .optimize = .Debug, + }), }); + emoji_gen_exe.root_module.addAnonymousImport("emoji-data.txt", .{ .root_source_file = b.path("data/unicode/emoji/emoji-data.txt") }); const run_emoji_gen_exe = b.addRunArtifact(emoji_gen_exe); - const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.bin.z"); + const emoji_gen_out = run_emoji_gen_exe.addOutputFileArg("emoji.zig"); const wbp_gen_exe = b.addExecutable(.{ .name = "wbp", @@ -283,8 +286,6 @@ pub fn build(b: *std.Build) void { const emoji_t = b.addTest(.{ .name = "Emoji", .root_module = emoji, - .target = target, - .optimize = optimize, }); const emoji_tr = b.addRunArtifact(emoji_t); diff --git a/codegen/emoji.zig b/codegen/emoji.zig index 0a4dbe6..c44c483 100644 --- a/codegen/emoji.zig +++ b/codegen/emoji.zig @@ -44,21 +44,17 @@ pub fn main() !void { var emoji_map = std.AutoHashMap(u21, Emoji).init(allocator); defer emoji_map.deinit(); - var line_buf: [4096]u8 = undefined; - // Process Emoji - var in_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{}); - defer in_file.close(); - var in_buf = std.io.bufferedReader(in_file.reader()); - const in_reader = in_buf.reader(); - while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { - if (line.len == 0 or line[0] == '#') continue; + var @"emo-reader" = std.io.Reader.fixed(@embedFile("emoji-data.txt")); + var count: usize = 0; // XXX: remove + while (@"emo-reader".takeDelimiterInclusive('\n')) |line| { + count += 1; + if (line.len <= 1 or line[0] == '#') continue; const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); var current_code: [2]u21 = undefined; - var i: usize = 0; while (field_iter.next()) |field| : (i += 1) { switch (i) { @@ -91,15 +87,20 @@ pub fn main() !void { else => {}, } } + } else |err| switch (err) { + error.EndOfStream => {}, + else => { + return err; + }, } var blocks_map = BlockMap.init(allocator); defer blocks_map.deinit(); - var stage1 = std.ArrayList(u16).init(allocator); + var stage1 = std.array_list.Managed(u16).init(allocator); defer stage1.deinit(); - var stage2 = std.ArrayList(u6).init(allocator); + var stage2 = std.array_list.Managed(u6).init(allocator); defer stage2.deinit(); var block: Block = [_]u6{0} ** block_size; @@ -129,18 +130,31 @@ pub fn main() !void { _ = args_iter.skip(); const output_path = args_iter.next() orelse @panic("No output file arg!"); - const compressor = std.compress.flate.deflate.compressor; + var write_buf: [4096]u8 = undefined; var out_file = try std.fs.cwd().createFile(output_path, .{}); defer out_file.close(); - var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); - const writer = out_comp.writer(); - - const endian = builtin.cpu.arch.endian(); - try writer.writeInt(u16, @intCast(stage1.items.len), endian); - for (stage1.items) |i| try writer.writeInt(u16, i, endian); + var writer = out_file.writer(&write_buf); + + try writer.interface.print( + \\//! This file is auto-generated. Do not edit. + \\ + \\pub const s1: [{}]u16 = .{{ + , .{stage1.items.len}); + for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); + + try writer.interface.print( + \\ + \\}}; + \\ + \\pub const s2: [{}]u6 = .{{ + , .{stage2.items.len}); + for (stage2.items) |entry| { + try writer.interface.print("{}, ", .{entry}); + } - try writer.writeInt(u16, @intCast(stage2.items.len), endian); - for (stage2.items) |i| try writer.writeInt(u8, i, endian); + try writer.interface.writeAll( + \\}; + ); - try out_comp.flush(); + try writer.interface.flush(); } diff --git a/src/Emoji.zig b/src/Emoji.zig index 75b44c2..13f675b 100644 --- a/src/Emoji.zig +++ b/src/Emoji.zig @@ -1,17 +1,17 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const mem = std.mem; -const Allocator = mem.Allocator; -const compress = std.compress; -const unicode = std.unicode; - -const CodePoint = @import("code_point").CodePoint; -const CodePointIterator = @import("code_point").Iterator; +//! Emoji module -s1: []u16 = undefined, -s2: []u6 = undefined, +const Data = struct { + s1: []const u16 = undefined, + s2: []const u6 = undefined, +}; -const Emoji = @This(); +const emoji = emoji: { + const data = @import("emoji"); + break :emoji Data{ + .s1 = &data.s1, + .s2 = &data.s2, + }; +}; // This must be an exact match of `Emoji` from `codegen/emoji.zig`. pub const Properties = packed struct { @@ -23,110 +23,68 @@ pub const Properties = packed struct { Extended_Pictographic: bool = false, }; -pub fn init(allocator: Allocator) Allocator.Error!Emoji { - var emoji = Emoji{}; - try emoji.setup(allocator); - return emoji; -} - -pub fn setup(emoji: *Emoji, allocator: Allocator) Allocator.Error!void { - const decompressor = compress.flate.inflate.decompressor; - const in_bytes = @embedFile("emoji"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var in_decomp = decompressor(.raw, in_fbs.reader()); - var reader = in_decomp.reader(); - - const endian = builtin.cpu.arch.endian(); - - const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; - emoji.s1 = try allocator.alloc(u16, s1_len); - errdefer allocator.free(emoji.s1); - for (0..s1_len) |i| emoji.s1[i] = reader.readInt(u16, endian) catch unreachable; - - const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; - emoji.s2 = try allocator.alloc(u6, s2_len); - errdefer allocator.free(emoji.s2); - for (0..s2_len) |i| emoji.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); -} - -pub fn deinit(emoji: *const Emoji, allocator: Allocator) void { - allocator.free(emoji.s1); - allocator.free(emoji.s2); -} - /// Lookup the emoji properties for a code point. -fn properties(emoji: Emoji, cp: u21) Properties { +fn properties(cp: u21) Properties { return @bitCast(emoji.s2[emoji.s1[cp >> 8] + (cp & 0xff)]); } -pub fn isEmoji(emoji: Emoji, cp: u21) bool { - return properties(emoji, cp).Emoji; +pub fn isEmoji(cp: u21) bool { + return properties(cp).Emoji; } -pub fn isEmojiPresentation(emoji: Emoji, cp: u21) bool { - return properties(emoji, cp).Emoji_Presentation; +pub fn isEmojiPresentation(cp: u21) bool { + return properties(cp).Emoji_Presentation; } -pub fn isEmojiModifier(emoji: Emoji, cp: u21) bool { - return properties(emoji, cp).Emoji_Modifier; +pub fn isEmojiModifier(cp: u21) bool { + return properties(cp).Emoji_Modifier; } -pub fn isEmojiModifierBase(emoji: Emoji, cp: u21) bool { - return properties(emoji, cp).Emoji_Modifier_Base; +pub fn isEmojiModifierBase(cp: u21) bool { + return properties(cp).Emoji_Modifier_Base; } -pub fn isEmojiComponent(emoji: Emoji, cp: u21) bool { - return properties(emoji, cp).Emoji_Component; +pub fn isEmojiComponent(cp: u21) bool { + return properties(cp).Emoji_Component; } -pub fn isExtendedPictographic(emoji: Emoji, cp: u21) bool { - return properties(emoji, cp).Extended_Pictographic; +pub fn isExtendedPictographic(cp: u21) bool { + return properties(cp).Extended_Pictographic; } test "isEmoji" { - const emoji = try Emoji.init(std.testing.allocator); - defer emoji.deinit(std.testing.allocator); - - try std.testing.expect(emoji.isEmoji(0x1F415)); // 🐕 - try std.testing.expect(!emoji.isEmoji(0x3042)); // あ + try std.testing.expect(isEmoji(0x1F415)); // 🐕 + try std.testing.expect(!isEmoji(0x3042)); // あ } test "isEmojiPresentation" { - const emoji = try Emoji.init(std.testing.allocator); - defer emoji.deinit(std.testing.allocator); - - try std.testing.expect(emoji.isEmojiPresentation(0x1F408)); // 🐈 - try std.testing.expect(!emoji.isEmojiPresentation(0x267E)); // ♾ + try std.testing.expect(isEmojiPresentation(0x1F408)); // 🐈 + try std.testing.expect(!isEmojiPresentation(0x267E)); // ♾ } test "isEmojiModifier" { - const emoji = try Emoji.init(std.testing.allocator); - defer emoji.deinit(std.testing.allocator); - - try std.testing.expect(emoji.isEmojiModifier(0x1F3FF)); // 🏿 - try std.testing.expect(!emoji.isEmojiModifier(0x1F385)); // 🎅 + try std.testing.expect(isEmojiModifier(0x1F3FF)); // + try std.testing.expect(!isEmojiModifier(0x1F385)); // 🎅 } test "isEmojiModifierBase" { - const emoji = try Emoji.init(std.testing.allocator); - defer emoji.deinit(std.testing.allocator); - - try std.testing.expect(emoji.isEmojiModifierBase(0x1F977)); // 🥷 - try std.testing.expect(!emoji.isEmojiModifierBase(0x1F4F8)); // 📸 + try std.testing.expect(isEmojiModifierBase(0x1F977)); // 🥷 + try std.testing.expect(!isEmojiModifierBase(0x1F4F8)); // 📸 } test "isEmojiComponent" { - const emoji = try Emoji.init(std.testing.allocator); - defer emoji.deinit(std.testing.allocator); - - try std.testing.expect(emoji.isEmojiComponent(0x1F9B0)); // 🦰 - try std.testing.expect(!emoji.isEmojiComponent(0x1F9B5)); // 🦵 + try std.testing.expect(isEmojiComponent(0x1F9B0)); // 🦰 + try std.testing.expect(!isEmojiComponent(0x1F9B5)); // 🦵 } test "isExtendedPictographic" { - const emoji = try Emoji.init(std.testing.allocator); - defer emoji.deinit(std.testing.allocator); - - try std.testing.expect(emoji.isExtendedPictographic(0x1F005)); // 🀅 - try std.testing.expect(!emoji.isExtendedPictographic(0x2A)); // * + try std.testing.expect(isExtendedPictographic(0x1F005)); // 🀅 + try std.testing.expect(!isExtendedPictographic(0x2A)); // * } + +const std = @import("std"); +const builtin = @import("builtin"); +const unicode = std.unicode; + +const CodePoint = @import("code_point").CodePoint; +const CodePointIterator = @import("code_point").Iterator; -- cgit v1.2.3