summaryrefslogtreecommitdiff
path: root/src/GenCatData.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/GenCatData.zig')
-rw-r--r--src/GenCatData.zig83
1 files changed, 83 insertions, 0 deletions
diff --git a/src/GenCatData.zig b/src/GenCatData.zig
new file mode 100644
index 0000000..5496e4e
--- /dev/null
+++ b/src/GenCatData.zig
@@ -0,0 +1,83 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6/// General Category
7pub const Gc = enum {
8 Cc,
9 Cf,
10 Cn,
11 Co,
12 Cs,
13 Ll,
14 Lm,
15 Lo,
16 Lt,
17 Lu,
18 Mc,
19 Me,
20 Mn,
21 Nd,
22 Nl,
23 No,
24 Pc,
25 Pd,
26 Pe,
27 Pf,
28 Pi,
29 Po,
30 Ps,
31 Sc,
32 Sk,
33 Sm,
34 So,
35 Zl,
36 Zp,
37 Zs,
38};
39
40allocator: mem.Allocator,
41s1: []u16 = undefined,
42s2: []u5 = undefined,
43s3: []u5 = undefined,
44
45const Self = @This();
46
47pub fn init(allocator: mem.Allocator) !Self {
48 const decompressor = compress.deflate.decompressor;
49 const in_bytes = @embedFile("gencat");
50 var in_fbs = std.io.fixedBufferStream(in_bytes);
51 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
52 defer in_decomp.deinit();
53 var reader = in_decomp.reader();
54
55 const endian = builtin.cpu.arch.endian();
56
57 var self = Self{ .allocator = allocator };
58
59 const s1_len: u16 = try reader.readInt(u16, endian);
60 self.s1 = try allocator.alloc(u16, s1_len);
61 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
62
63 const s2_len: u16 = try reader.readInt(u16, endian);
64 self.s2 = try allocator.alloc(u5, s2_len);
65 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
66
67 const s3_len: u16 = try reader.readInt(u8, endian);
68 self.s3 = try allocator.alloc(u5, s3_len);
69 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
70
71 return self;
72}
73
74pub fn deinit(self: *Self) void {
75 self.allocator.free(self.s1);
76 self.allocator.free(self.s2);
77 self.allocator.free(self.s3);
78}
79
80/// Lookup the General Category for `cp`.
81pub inline fn gc(self: Self, cp: u21) Gc {
82 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
83}