summaryrefslogtreecommitdiff
path: root/src/GenCatData.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/GenCatData.zig')
-rw-r--r--src/GenCatData.zig170
1 files changed, 0 insertions, 170 deletions
diff --git a/src/GenCatData.zig b/src/GenCatData.zig
deleted file mode 100644
index a69f7a2..0000000
--- a/src/GenCatData.zig
+++ /dev/null
@@ -1,170 +0,0 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5
6/// General Category
7pub const Gc = enum {
8 Cc, // Other, Control
9 Cf, // Other, Format
10 Cn, // Other, Unassigned
11 Co, // Other, Private Use
12 Cs, // Other, Surrogate
13 Ll, // Letter, Lowercase
14 Lm, // Letter, Modifier
15 Lo, // Letter, Other
16 Lu, // Letter, Uppercase
17 Lt, // Letter, Titlecase
18 Mc, // Mark, Spacing Combining
19 Me, // Mark, Enclosing
20 Mn, // Mark, Non-Spacing
21 Nd, // Number, Decimal Digit
22 Nl, // Number, Letter
23 No, // Number, Other
24 Pc, // Punctuation, Connector
25 Pd, // Punctuation, Dash
26 Pe, // Punctuation, Close
27 Pf, // Punctuation, Final quote (may behave like Ps or Pe depending on usage)
28 Pi, // Punctuation, Initial quote (may behave like Ps or Pe depending on usage)
29 Po, // Punctuation, Other
30 Ps, // Punctuation, Open
31 Sc, // Symbol, Currency
32 Sk, // Symbol, Modifier
33 Sm, // Symbol, Math
34 So, // Symbol, Other
35 Zl, // Separator, Line
36 Zp, // Separator, Paragraph
37 Zs, // Separator, Space
38};
39
40s1: []u16 = undefined,
41s2: []u5 = undefined,
42s3: []u5 = undefined,
43
44const Self = @This();
45
46pub fn init(allocator: mem.Allocator) !Self {
47 const decompressor = compress.flate.inflate.decompressor;
48 const in_bytes = @embedFile("gencat");
49 var in_fbs = std.io.fixedBufferStream(in_bytes);
50 var in_decomp = decompressor(.raw, in_fbs.reader());
51 var reader = in_decomp.reader();
52
53 const endian = builtin.cpu.arch.endian();
54
55 var self = Self{};
56
57 const s1_len: u16 = try reader.readInt(u16, endian);
58 self.s1 = try allocator.alloc(u16, s1_len);
59 errdefer allocator.free(self.s1);
60 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
61
62 const s2_len: u16 = try reader.readInt(u16, endian);
63 self.s2 = try allocator.alloc(u5, s2_len);
64 errdefer allocator.free(self.s2);
65 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
66
67 const s3_len: u16 = try reader.readInt(u8, endian);
68 self.s3 = try allocator.alloc(u5, s3_len);
69 errdefer allocator.free(self.s3);
70 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
71
72 return self;
73}
74
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
76 allocator.free(self.s1);
77 allocator.free(self.s2);
78 allocator.free(self.s3);
79}
80
81/// Lookup the General Category for `cp`.
82pub fn gc(self: Self, cp: u21) Gc {
83 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
84}
85
86/// True if `cp` has an C general category.
87pub fn isControl(self: Self, cp: u21) bool {
88 return switch (self.gc(cp)) {
89 .Cc,
90 .Cf,
91 .Cn,
92 .Co,
93 .Cs,
94 => true,
95 else => false,
96 };
97}
98
99/// True if `cp` has an L general category.
100pub fn isLetter(self: Self, cp: u21) bool {
101 return switch (self.gc(cp)) {
102 .Ll,
103 .Lm,
104 .Lo,
105 .Lu,
106 .Lt,
107 => true,
108 else => false,
109 };
110}
111
112/// True if `cp` has an M general category.
113pub fn isMark(self: Self, cp: u21) bool {
114 return switch (self.gc(cp)) {
115 .Mc,
116 .Me,
117 .Mn,
118 => true,
119 else => false,
120 };
121}
122
123/// True if `cp` has an N general category.
124pub fn isNumber(self: Self, cp: u21) bool {
125 return switch (self.gc(cp)) {
126 .Nd,
127 .Nl,
128 .No,
129 => true,
130 else => false,
131 };
132}
133
134/// True if `cp` has an P general category.
135pub fn isPunctuation(self: Self, cp: u21) bool {
136 return switch (self.gc(cp)) {
137 .Pc,
138 .Pd,
139 .Pe,
140 .Pf,
141 .Pi,
142 .Po,
143 .Ps,
144 => true,
145 else => false,
146 };
147}
148
149/// True if `cp` has an S general category.
150pub fn isSymbol(self: Self, cp: u21) bool {
151 return switch (self.gc(cp)) {
152 .Sc,
153 .Sk,
154 .Sm,
155 .So,
156 => true,
157 else => false,
158 };
159}
160
161/// True if `cp` has an Z general category.
162pub fn isSeparator(self: Self, cp: u21) bool {
163 return switch (self.gc(cp)) {
164 .Zl,
165 .Zp,
166 .Zs,
167 => true,
168 else => false,
169 };
170}