From ba5d9081b479e95ffa7f3baf751beedd370cec14 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:01:36 -0500 Subject: Normalization and case folding Both of which deserve some further attention. --- src/NormPropsData.zig | 46 +++++++++++++++------------------------------- 1 file changed, 15 insertions(+), 31 deletions(-) (limited to 'src/NormPropsData.zig') diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index 7b53542..cca3556 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig @@ -1,48 +1,32 @@ //! Normalization Properties Data -s1: []u16 = undefined, -s2: []u4 = undefined, +const Data = struct { + s1: []const u16 = undefined, + s2: []const u3 = undefined, +}; + +const norms = norm_props_data: { + const data = @import("normp"); + break :norm_props_data Data{ + .s1 = &data.s1, + .s2 = &data.s2, + }; +}; const NormProps = @This(); -pub fn init(allocator: mem.Allocator) !NormProps { - const in_bytes = @embedFile("normp"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var reader = in_fbs.reader(); - - const endian = builtin.cpu.arch.endian(); - var norms = NormProps{}; - - const stage_1_len: u16 = try reader.readInt(u16, endian); - norms.s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(norms.s1); - for (0..stage_1_len) |i| norms.s1[i] = try reader.readInt(u16, endian); - - const stage_2_len: u16 = try reader.readInt(u16, endian); - norms.s2 = try allocator.alloc(u4, stage_2_len); - errdefer allocator.free(norms.s2); - for (0..stage_2_len) |i| norms.s2[i] = @intCast(try reader.readInt(u8, endian)); - - return norms; -} - -pub fn deinit(norms: *const NormProps, allocator: mem.Allocator) void { - allocator.free(norms.s1); - allocator.free(norms.s2); -} - /// Returns true if `cp` is already in NFD form. -pub fn isNfd(norms: *const NormProps, cp: u21) bool { +pub fn isNfd(cp: u21) bool { return norms.s2[norms.s1[cp >> 8] + (cp & 0xff)] & 1 == 0; } /// Returns true if `cp` is already in NFKD form. -pub fn isNfkd(norms: *const NormProps, cp: u21) bool { +pub fn isNfkd(cp: u21) bool { return norms.s2[norms.s1[cp >> 8] + (cp & 0xff)] & 2 == 0; } /// Returns true if `cp` is not allowed in any normalized form. -pub fn isFcx(norms: *const NormProps, cp: u21) bool { +pub fn isFcx(cp: u21) bool { return norms.s2[norms.s1[cp >> 8] + (cp & 0xff)] & 4 == 4; } -- cgit v1.2.3