summaryrefslogtreecommitdiff
path: root/src/CombiningClassData.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-26 12:24:42 -0400
committerGravatar Jose Colon Rodriguez2024-02-26 12:24:42 -0400
commit836a4b6e63ac4bd7beb406cb20edf23f0bd342a9 (patch)
tree5f806a29594a9cb227aaa4d131209e10ff25aeee /src/CombiningClassData.zig
parentReplaced ccc_map with table. 20ms faster (diff)
downloadzg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.tar.gz
zg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.tar.xz
zg-836a4b6e63ac4bd7beb406cb20edf23f0bd342a9.zip
Using separate data struct model.
Diffstat (limited to 'src/CombiningClassData.zig')
-rw-r--r--src/CombiningClassData.zig48
1 files changed, 48 insertions, 0 deletions
diff --git a/src/CombiningClassData.zig b/src/CombiningClassData.zig
new file mode 100644
index 0000000..95c947d
--- /dev/null
+++ b/src/CombiningClassData.zig
@@ -0,0 +1,48 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6allocator: mem.Allocator,
7s1: []u16 = undefined,
8s2: []u8 = undefined,
9
10const Self = @This();
11
12pub fn init(allocator: mem.Allocator) !Self {
13 const decompressor = compress.deflate.decompressor;
14 const in_bytes = @embedFile("ccc");
15 var in_fbs = std.io.fixedBufferStream(in_bytes);
16 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
17 defer in_decomp.deinit();
18 var reader = in_decomp.reader();
19
20 const endian = builtin.cpu.arch.endian();
21
22 var self = Self{ .allocator = allocator };
23
24 const stage_1_len: u16 = try reader.readInt(u16, endian);
25 self.s1 = try allocator.alloc(u16, stage_1_len);
26 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
27
28 const stage_2_len: u16 = try reader.readInt(u16, endian);
29 self.s2 = try allocator.alloc(u8, stage_2_len);
30 _ = try reader.readAll(self.s2);
31
32 return self;
33}
34
35pub fn deinit(self: *Self) void {
36 self.allocator.free(self.s1);
37 self.allocator.free(self.s2);
38}
39
40/// Returns the canonical combining class for a code point.
41pub inline fn ccc(self: Self, cp: u21) u8 {
42 return self.s2[self.s1[cp >> 8] + (cp & 0xff)];
43}
44
45/// True if `cp` is a starter code point, not a combining character.
46pub inline fn isStarter(self: Self, cp: u21) bool {
47 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] == 0;
48}