From ba5d9081b479e95ffa7f3baf751beedd370cec14 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:01:36 -0500 Subject: Normalization and case folding Both of which deserve some further attention. --- src/HangulData.zig | 42 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 29 deletions(-) (limited to 'src/HangulData.zig') diff --git a/src/HangulData.zig b/src/HangulData.zig index cae8b97..9c17421 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig @@ -9,39 +9,23 @@ pub const Syllable = enum { T, }; -s1: []u16 = undefined, -s2: []u3 = undefined, - -const Hangul = @This(); - -pub fn init(allocator: mem.Allocator) !Hangul { - const in_bytes = @embedFile("hangul"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var reader = in_fbs.reader(); - - const endian = builtin.cpu.arch.endian(); - var hangul = Hangul{}; - - const stage_1_len: u16 = try reader.readInt(u16, endian); - hangul.s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(hangul.s1); - for (0..stage_1_len) |i| hangul.s1[i] = try reader.readInt(u16, endian); - - const stage_2_len: u16 = try reader.readInt(u16, endian); - hangul.s2 = try allocator.alloc(u3, stage_2_len); - errdefer allocator.free(hangul.s2); - for (0..stage_2_len) |i| hangul.s2[i] = @intCast(try reader.readInt(u8, endian)); +const Data = struct { + s1: []const u16 = undefined, + s2: []const u3 = undefined, +}; - return hangul; -} +const hangul = hangul_data: { + const data = @import("hangul"); + break :hangul_data Data{ + .s1 = &data.s1, + .s2 = &data.s2, + }; +}; -pub fn deinit(hangul: *const Hangul, allocator: mem.Allocator) void { - allocator.free(hangul.s1); - allocator.free(hangul.s2); -} +const Hangul = @This(); /// Returns the Hangul syllable type for `cp`. -pub fn syllable(hangul: *const Hangul, cp: u21) Syllable { +pub fn syllable(cp: u21) Syllable { return @enumFromInt(hangul.s2[hangul.s1[cp >> 8] + (cp & 0xff)]); } -- cgit v1.2.3