summaryrefslogtreecommitdiff
path: root/src/CombiningData.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2026-02-04 18:01:36 -0500
committerGravatar Sam Atman2026-02-04 18:01:36 -0500
commitba5d9081b479e95ffa7f3baf751beedd370cec14 (patch)
treec12041d8aab9f9ff68b25a2e2c9042073c3d5f61 /src/CombiningData.zig
parentConvert Words module to no-allocation (diff)
downloadzg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.gz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.tar.xz
zg-ba5d9081b479e95ffa7f3baf751beedd370cec14.zip
Normalization and case folding
Both of which deserve some further attention.
Diffstat (limited to 'src/CombiningData.zig')
-rw-r--r--src/CombiningData.zig49
1 files changed, 16 insertions, 33 deletions
diff --git a/src/CombiningData.zig b/src/CombiningData.zig
index f58e0de..660dc14 100644
--- a/src/CombiningData.zig
+++ b/src/CombiningData.zig
@@ -1,45 +1,28 @@
1//! Combining Class Data 1//! Combining Class Data
2 2
3s1: []u16 = undefined, 3const Data = struct {
4s2: []u8 = undefined, 4 s1: []const u16 = undefined,
5 s2: []const u8 = undefined,
6};
7
8const combining_data = combining_data: {
9 const data = @import("ccc");
10 break :combining_data Data{
11 .s1 = &data.s1,
12 .s2 = &data.s2,
13 };
14};
5 15
6const CombiningData = @This(); 16const CombiningData = @This();
7 17
8pub fn init(allocator: mem.Allocator) !CombiningData {
9 const in_bytes = @embedFile("ccc");
10 var in_fbs = std.io.fixedBufferStream(in_bytes);
11 var reader = in_fbs.reader();
12
13 const endian = builtin.cpu.arch.endian();
14
15 var cbdata = CombiningData{};
16
17 const stage_1_len: u16 = try reader.readInt(u16, endian);
18 cbdata.s1 = try allocator.alloc(u16, stage_1_len);
19 errdefer allocator.free(cbdata.s1);
20 for (0..stage_1_len) |i| cbdata.s1[i] = try reader.readInt(u16, endian);
21
22 const stage_2_len: u16 = try reader.readInt(u16, endian);
23 cbdata.s2 = try allocator.alloc(u8, stage_2_len);
24 errdefer allocator.free(cbdata.s2);
25 _ = try reader.readAll(cbdata.s2);
26
27 return cbdata;
28}
29
30pub fn deinit(cbdata: *const CombiningData, allocator: mem.Allocator) void {
31 allocator.free(cbdata.s1);
32 allocator.free(cbdata.s2);
33}
34
35/// Returns the canonical combining class for a code point. 18/// Returns the canonical combining class for a code point.
36pub fn ccc(cbdata: CombiningData, cp: u21) u8 { 19pub fn ccc(cp: u21) u8 {
37 return cbdata.s2[cbdata.s1[cp >> 8] + (cp & 0xff)]; 20 return combining_data.s2[combining_data.s1[cp >> 8] + (cp & 0xff)];
38} 21}
39 22
40/// True if `cp` is a starter code point, not a combining character. 23/// True if `cp` is a starter code point, not a combining character.
41pub fn isStarter(cbdata: CombiningData, cp: u21) bool { 24pub fn isStarter(cp: u21) bool {
42 return cbdata.s2[cbdata.s1[cp >> 8] + (cp & 0xff)] == 0; 25 return combining_data.s2[combining_data.s1[cp >> 8] + (cp & 0xff)] == 0;
43} 26}
44 27
45const std = @import("std"); 28const std = @import("std");