summaryrefslogtreecommitdiff
path: root/src/GraphemeData.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 12:02:17 -0400
committerGravatar Sam Atman2025-04-30 12:02:17 -0400
commit7a212f5ec5aabf016d17d3ed28649e7982b810ef (patch)
treec6b06b0a0afb0ed2ba18f147d9ee200e5eee09a1 /src/GraphemeData.zig
parentFactor out 'Data' for grapheme and DisplayWidth (diff)
downloadzg-7a212f5ec5aabf016d17d3ed28649e7982b810ef.tar.gz
zg-7a212f5ec5aabf016d17d3ed28649e7982b810ef.tar.xz
zg-7a212f5ec5aabf016d17d3ed28649e7982b810ef.zip
grapheme now Graphemes, Data files gone
Diffstat (limited to 'src/GraphemeData.zig')
-rw-r--r--src/GraphemeData.zig87
1 files changed, 0 insertions, 87 deletions
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
deleted file mode 100644
index df025cb..0000000
--- a/src/GraphemeData.zig
+++ /dev/null
@@ -1,87 +0,0 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6/// Indic syllable type.
7pub const Indic = enum {
8 none,
9
10 Consonant,
11 Extend,
12 Linker,
13};
14
15/// Grapheme break property.
16pub const Gbp = enum {
17 none,
18 Control,
19 CR,
20 Extend,
21 L,
22 LF,
23 LV,
24 LVT,
25 Prepend,
26 Regional_Indicator,
27 SpacingMark,
28 T,
29 V,
30 ZWJ,
31};
32
33s1: []u16 = undefined,
34s2: []u16 = undefined,
35s3: []u8 = undefined,
36
37const Self = @This();
38
39pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Self {
40 const decompressor = compress.flate.inflate.decompressor;
41 const in_bytes = @embedFile("gbp");
42 var in_fbs = std.io.fixedBufferStream(in_bytes);
43 var in_decomp = decompressor(.raw, in_fbs.reader());
44 var reader = in_decomp.reader();
45
46 const endian = builtin.cpu.arch.endian();
47
48 var self = Self{};
49
50 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
51 self.s1 = try allocator.alloc(u16, s1_len);
52 errdefer allocator.free(self.s1);
53 for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable;
54
55 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
56 self.s2 = try allocator.alloc(u16, s2_len);
57 errdefer allocator.free(self.s2);
58 for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable;
59
60 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable;
61 self.s3 = try allocator.alloc(u8, s3_len);
62 errdefer allocator.free(self.s3);
63 _ = reader.readAll(self.s3) catch unreachable;
64
65 return self;
66}
67
68pub inline fn deinit(self: *const Self, allocator: mem.Allocator) void {
69 allocator.free(self.s1);
70 allocator.free(self.s2);
71 allocator.free(self.s3);
72}
73
74/// Lookup the grapheme break property for a code point.
75pub inline fn gbp(self: Self, cp: u21) Gbp {
76 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 4);
77}
78
79/// Lookup the indic syllable type for a code point.
80pub inline fn indic(self: Self, cp: u21) Indic {
81 return @enumFromInt((self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] >> 1) & 0x7);
82}
83
84/// Lookup the emoji property for a code point.
85pub inline fn isEmoji(self: Self, cp: u21) bool {
86 return self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]] & 1 == 1;
87}