summaryrefslogtreecommitdiff
path: root/src/GraphemeData.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/GraphemeData.zig')
-rw-r--r--src/GraphemeData.zig86
1 files changed, 86 insertions, 0 deletions
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
new file mode 100644
index 0000000..e418dea
--- /dev/null
+++ b/src/GraphemeData.zig
@@ -0,0 +1,86 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6/// Indic syllable type.
7pub const Indic = enum {
8 none,
9
10 Consonant,
11 Extend,
12 Linker,
13};
14
15/// Grapheme break property.
16pub const Gbp = enum {
17 none,
18 Control,
19 CR,
20 Extend,
21 L,
22 LF,
23 LV,
24 LVT,
25 Prepend,
26 Regional_Indicator,
27 SpacingMark,
28 T,
29 V,
30 ZWJ,
31};
32
33allocator: mem.Allocator,
34s1: []u16 = undefined,
35s2: []u16 = undefined,
36s3: []u8 = undefined,
37
38const Self = @This();
39
40pub fn init(allocator: mem.Allocator) !Self {
41 const decompressor = compress.deflate.decompressor;
42 const in_bytes = @embedFile("gbp");
43 var in_fbs = std.io.fixedBufferStream(in_bytes);
44 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
45 defer in_decomp.deinit();
46 var reader = in_decomp.reader();
47
48 const endian = builtin.cpu.arch.endian();
49
50 var self = Self{ .allocator = allocator };
51
52 const s1_len: u16 = try reader.readInt(u16, endian);
53 self.s1 = try allocator.alloc(u16, s1_len);
54 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
55
56 const s2_len: u16 = try reader.readInt(u16, endian);
57 self.s2 = try allocator.alloc(u16, s2_len);
58 for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian);
59
60 const s3_len: u16 = try reader.readInt(u16, endian);
61 self.s3 = try allocator.alloc(u8, s3_len);
62 _ = try reader.readAll(self.s3);
63
64 return self;
65}
66
67pub fn deinit(self: *Self) void {
68 self.allocator.free(self.s1);
69 self.allocator.free(self.s2);
70 self.allocator.free(self.s3);
71}
72
73/// Lookup the grapheme break property for a code point.
74pub inline fn gbp(self: Self, cp: u21) Gbp {
75 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
76}
77
78/// Lookup the indic syllable type for a code point.
79pub inline fn indic(self: Self, cp: u21) Indic {
80 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
81}
82
83/// Lookup the indic syllable type for a code point.
84pub inline fn isEmoji(self: Self, cp: u21) bool {
85 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
86}