summaryrefslogtreecommitdiff
path: root/src/HangulData.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-04 18:01:36 -0500
committerGravatar Sam Atman2026-02-04 18:01:36 -0500
commitba5d9081b479e95ffa7f3baf751beedd370cec14 (patch)
treec12041d8aab9f9ff68b25a2e2c9042073c3d5f61 /src/HangulData.zig
parentConvert Words module to no-allocation (diff)
downloadzg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.gz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.xz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.zip
Normalization and case folding
Both of which deserve some further attention.
Diffstat (limited to 'src/HangulData.zig')
-rw-r--r--src/HangulData.zig42
1 files changed, 13 insertions, 29 deletions
diff --git a/src/HangulData.zig b/src/HangulData.zig
index cae8b97..9c17421 100644
--- a/src/HangulData.zig
+++ b/src/HangulData.zig
@@ -9,39 +9,23 @@ pub const Syllable = enum {
9 T, 9 T,
10}; 10};
11 11
12s1: []u16 = undefined, 12const Data = struct {
13s2: []u3 = undefined, 13 s1: []const u16 = undefined,
14 14 s2: []const u3 = undefined,
15const Hangul = @This(); 15};
16
17pub fn init(allocator: mem.Allocator) !Hangul {
18 const in_bytes = @embedFile("hangul");
19 var in_fbs = std.io.fixedBufferStream(in_bytes);
20 var reader = in_fbs.reader();
21
22 const endian = builtin.cpu.arch.endian();
23 var hangul = Hangul{};
24
25 const stage_1_len: u16 = try reader.readInt(u16, endian);
26 hangul.s1 = try allocator.alloc(u16, stage_1_len);
27 errdefer allocator.free(hangul.s1);
28 for (0..stage_1_len) |i| hangul.s1[i] = try reader.readInt(u16, endian);
29
30 const stage_2_len: u16 = try reader.readInt(u16, endian);
31 hangul.s2 = try allocator.alloc(u3, stage_2_len);
32 errdefer allocator.free(hangul.s2);
33 for (0..stage_2_len) |i| hangul.s2[i] = @intCast(try reader.readInt(u8, endian));
34 16
35 return hangul; 17const hangul = hangul_data: {
36} 18 const data = @import("hangul");
19 break :hangul_data Data{
20 .s1 = &data.s1,
21 .s2 = &data.s2,
22 };
23};
37 24
38pub fn deinit(hangul: *const Hangul, allocator: mem.Allocator) void { 25const Hangul = @This();
39 allocator.free(hangul.s1);
40 allocator.free(hangul.s2);
41}
42 26
43/// Returns the Hangul syllable type for `cp`. 27/// Returns the Hangul syllable type for `cp`.
44pub fn syllable(hangul: *const Hangul, cp: u21) Syllable { 28pub fn syllable(cp: u21) Syllable {
45 return @enumFromInt(hangul.s2[hangul.s1[cp >> 8] + (cp & 0xff)]); 29 return @enumFromInt(hangul.s2[hangul.s1[cp >> 8] + (cp & 0xff)]);
46} 30}
47 31