From ba5d9081b479e95ffa7f3baf751beedd370cec14 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:01:36 -0500 Subject: Normalization and case folding Both of which deserve some further attention. --- src/CompatData.zig | 64 +++++++++++------------------------------------------- 1 file changed, 13 insertions(+), 51 deletions(-) (limited to 'src/CompatData.zig') diff --git a/src/CompatData.zig b/src/CompatData.zig index 40ecd12..68d47f2 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig @@ -1,57 +1,19 @@ //! Compatibility Data -nfkd: [][]u21 = undefined, -cps: []u21 = undefined, - -const CompatData = @This(); - -pub fn init(allocator: mem.Allocator) !CompatData { - const in_bytes = @embedFile("compat"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var reader = in_fbs.reader(); - - const endian = builtin.cpu.arch.endian(); - var cpdata = CompatData{ - .nfkd = try allocator.alloc([]u21, 0x110000), +const Data = struct { + s1: []const u16 = undefined, + s2: []const []const u21 = undefined, +}; + +const compat_data = compat_data: { + const data = @import("compat"); + break :compat_data Data{ + .s1 = &data.s1, + .s2 = &data.s2, }; - { - errdefer allocator.free(cpdata.nfkd); - cpdata.cps = try allocator.alloc(u21, magic.compat_size); - } - errdefer cpdata.deinit(allocator); - - @memset(cpdata.nfkd, &.{}); - - var total_len: usize = 0; - - while (true) { - const len: u8 = try reader.readInt(u8, endian); - if (len == 0) break; - const cp = try reader.readInt(u24, endian); - const nk_s = cpdata.cps[total_len..][0 .. len - 1]; - for (0..len - 1) |i| { - nk_s[i] = @intCast(try reader.readInt(u24, endian)); - } - cpdata.nfkd[cp] = nk_s; - total_len += len - 1; - } - - if (comptime magic.print) std.debug.print("CompatData magic number: {d}", .{total_len}); - - return cpdata; -} - -pub fn deinit(cpdata: *const CompatData, allocator: mem.Allocator) void { - allocator.free(cpdata.cps); - allocator.free(cpdata.nfkd); -} +}; /// Returns compatibility decomposition for `cp`. -pub fn toNfkd(cpdata: *const CompatData, cp: u21) []u21 { - return cpdata.nfkd[cp]; +pub fn toNfkd(cp: u21) []const u21 { + return compat_data.s2[compat_data.s1[cp >> 8] + (cp & 0xff)]; } - -const std = @import("std"); -const builtin = @import("builtin"); -const mem = std.mem; -const magic = @import("magic"); -- cgit v1.2.3