From ba5d9081b479e95ffa7f3baf751beedd370cec14 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 4 Feb 2026 18:01:36 -0500 Subject: Normalization and case folding Both of which deserve some further attention. --- src/CombiningData.zig | 49 ++++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) (limited to 'src/CombiningData.zig') diff --git a/src/CombiningData.zig b/src/CombiningData.zig index f58e0de..660dc14 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig @@ -1,45 +1,28 @@ //! Combining Class Data -s1: []u16 = undefined, -s2: []u8 = undefined, +const Data = struct { + s1: []const u16 = undefined, + s2: []const u8 = undefined, +}; + +const combining_data = combining_data: { + const data = @import("ccc"); + break :combining_data Data{ + .s1 = &data.s1, + .s2 = &data.s2, + }; +}; const CombiningData = @This(); -pub fn init(allocator: mem.Allocator) !CombiningData { - const in_bytes = @embedFile("ccc"); - var in_fbs = std.io.fixedBufferStream(in_bytes); - var reader = in_fbs.reader(); - - const endian = builtin.cpu.arch.endian(); - - var cbdata = CombiningData{}; - - const stage_1_len: u16 = try reader.readInt(u16, endian); - cbdata.s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(cbdata.s1); - for (0..stage_1_len) |i| cbdata.s1[i] = try reader.readInt(u16, endian); - - const stage_2_len: u16 = try reader.readInt(u16, endian); - cbdata.s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(cbdata.s2); - _ = try reader.readAll(cbdata.s2); - - return cbdata; -} - -pub fn deinit(cbdata: *const CombiningData, allocator: mem.Allocator) void { - allocator.free(cbdata.s1); - allocator.free(cbdata.s2); -} - /// Returns the canonical combining class for a code point. -pub fn ccc(cbdata: CombiningData, cp: u21) u8 { - return cbdata.s2[cbdata.s1[cp >> 8] + (cp & 0xff)]; +pub fn ccc(cp: u21) u8 { + return combining_data.s2[combining_data.s1[cp >> 8] + (cp & 0xff)]; } /// True if `cp` is a starter code point, not a combining character. -pub fn isStarter(cbdata: CombiningData, cp: u21) bool { - return cbdata.s2[cbdata.s1[cp >> 8] + (cp & 0xff)] == 0; +pub fn isStarter(cp: u21) bool { + return combining_data.s2[combining_data.s1[cp >> 8] + (cp & 0xff)] == 0; } const std = @import("std"); -- cgit v1.2.3