summaryrefslogtreecommitdiff
path: root/src/CompatData.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-04 18:01:36 -0500
committerGravatar Sam Atman2026-02-04 18:01:36 -0500
commitba5d9081b479e95ffa7f3baf751beedd370cec14 (patch)
treec12041d8aab9f9ff68b25a2e2c9042073c3d5f61 /src/CompatData.zig
parentConvert Words module to no-allocation (diff)
downloadzg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.gz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.xz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.zip
Normalization and case folding
Both of which deserve some further attention.
Diffstat (limited to 'src/CompatData.zig')
-rw-r--r--src/CompatData.zig64
1 files changed, 13 insertions, 51 deletions
diff --git a/src/CompatData.zig b/src/CompatData.zig
index 40ecd12..68d47f2 100644
--- a/src/CompatData.zig
+++ b/src/CompatData.zig
@@ -1,57 +1,19 @@
1//! Compatibility Data 1//! Compatibility Data
2 2
3nfkd: [][]u21 = undefined, 3const Data = struct {
4cps: []u21 = undefined, 4 s1: []const u16 = undefined,
5 5 s2: []const []const u21 = undefined,
6const CompatData = @This(); 6};
7 7
8pub fn init(allocator: mem.Allocator) !CompatData { 8const compat_data = compat_data: {
9 const in_bytes = @embedFile("compat"); 9 const data = @import("compat");
10 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 break :compat_data Data{
11 var reader = in_fbs.reader(); 11 .s1 = &data.s1,
12 12 .s2 = &data.s2,
13 const endian = builtin.cpu.arch.endian();
14 var cpdata = CompatData{
15 .nfkd = try allocator.alloc([]u21, 0x110000),
16 }; 13 };
17 { 14};
18 errdefer allocator.free(cpdata.nfkd);
19 cpdata.cps = try allocator.alloc(u21, magic.compat_size);
20 }
21 errdefer cpdata.deinit(allocator);
22
23 @memset(cpdata.nfkd, &.{});
24
25 var total_len: usize = 0;
26
27 while (true) {
28 const len: u8 = try reader.readInt(u8, endian);
29 if (len == 0) break;
30 const cp = try reader.readInt(u24, endian);
31 const nk_s = cpdata.cps[total_len..][0 .. len - 1];
32 for (0..len - 1) |i| {
33 nk_s[i] = @intCast(try reader.readInt(u24, endian));
34 }
35 cpdata.nfkd[cp] = nk_s;
36 total_len += len - 1;
37 }
38
39 if (comptime magic.print) std.debug.print("CompatData magic number: {d}", .{total_len});
40
41 return cpdata;
42}
43
44pub fn deinit(cpdata: *const CompatData, allocator: mem.Allocator) void {
45 allocator.free(cpdata.cps);
46 allocator.free(cpdata.nfkd);
47}
48 15
49/// Returns compatibility decomposition for `cp`. 16/// Returns compatibility decomposition for `cp`.
50pub fn toNfkd(cpdata: *const CompatData, cp: u21) []u21 { 17pub fn toNfkd(cp: u21) []const u21 {
51 return cpdata.nfkd[cp]; 18 return compat_data.s2[compat_data.s1[cp >> 8] + (cp & 0xff)];
52} 19}
53
54const std = @import("std");
55const builtin = @import("builtin");
56const mem = std.mem;
57const magic = @import("magic");