summaryrefslogtreecommitdiff
path: root/src/ScriptsData.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-27 21:52:02 -0400
committerGravatar Jose Colon Rodriguez2024-03-27 21:52:02 -0400
commit4ce891a8ce5336da39180964792110e131756cdd (patch)
treeb4ff0180157bb49e15d2c36f2cf0cdaab1a24535 /src/ScriptsData.zig
parentFriendly general category methods (diff)
downloadzg-4ce891a8ce5336da39180964792110e131756cdd.tar.gz
zg-4ce891a8ce5336da39180964792110e131756cdd.tar.xz
zg-4ce891a8ce5336da39180964792110e131756cdd.zip
ScriptsData and made all Datas const
Diffstat (limited to 'src/ScriptsData.zig')
-rw-r--r--src/ScriptsData.zig226
1 files changed, 226 insertions, 0 deletions
diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig
new file mode 100644
index 0000000..ac1c46a
--- /dev/null
+++ b/src/ScriptsData.zig
@@ -0,0 +1,226 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7/// Script
8pub const Script = enum {
9 none,
10 Adlam,
11 Ahom,
12 Anatolian_Hieroglyphs,
13 Arabic,
14 Armenian,
15 Avestan,
16 Balinese,
17 Bamum,
18 Bassa_Vah,
19 Batak,
20 Bengali,
21 Bhaiksuki,
22 Bopomofo,
23 Brahmi,
24 Braille,
25 Buginese,
26 Buhid,
27 Canadian_Aboriginal,
28 Carian,
29 Caucasian_Albanian,
30 Chakma,
31 Cham,
32 Cherokee,
33 Chorasmian,
34 Common,
35 Coptic,
36 Cuneiform,
37 Cypriot,
38 Cypro_Minoan,
39 Cyrillic,
40 Deseret,
41 Devanagari,
42 Dives_Akuru,
43 Dogra,
44 Duployan,
45 Egyptian_Hieroglyphs,
46 Elbasan,
47 Elymaic,
48 Ethiopic,
49 Georgian,
50 Glagolitic,
51 Gothic,
52 Grantha,
53 Greek,
54 Gujarati,
55 Gunjala_Gondi,
56 Gurmukhi,
57 Han,
58 Hangul,
59 Hanifi_Rohingya,
60 Hanunoo,
61 Hatran,
62 Hebrew,
63 Hiragana,
64 Imperial_Aramaic,
65 Inherited,
66 Inscriptional_Pahlavi,
67 Inscriptional_Parthian,
68 Javanese,
69 Kaithi,
70 Kannada,
71 Katakana,
72 Kawi,
73 Kayah_Li,
74 Kharoshthi,
75 Khitan_Small_Script,
76 Khmer,
77 Khojki,
78 Khudawadi,
79 Lao,
80 Latin,
81 Lepcha,
82 Limbu,
83 Linear_A,
84 Linear_B,
85 Lisu,
86 Lycian,
87 Lydian,
88 Mahajani,
89 Makasar,
90 Malayalam,
91 Mandaic,
92 Manichaean,
93 Marchen,
94 Masaram_Gondi,
95 Medefaidrin,
96 Meetei_Mayek,
97 Mende_Kikakui,
98 Meroitic_Cursive,
99 Meroitic_Hieroglyphs,
100 Miao,
101 Modi,
102 Mongolian,
103 Mro,
104 Multani,
105 Myanmar,
106 Nabataean,
107 Nag_Mundari,
108 Nandinagari,
109 New_Tai_Lue,
110 Newa,
111 Nko,
112 Nushu,
113 Nyiakeng_Puachue_Hmong,
114 Ogham,
115 Ol_Chiki,
116 Old_Hungarian,
117 Old_Italic,
118 Old_North_Arabian,
119 Old_Permic,
120 Old_Persian,
121 Old_Sogdian,
122 Old_South_Arabian,
123 Old_Turkic,
124 Old_Uyghur,
125 Oriya,
126 Osage,
127 Osmanya,
128 Pahawh_Hmong,
129 Palmyrene,
130 Pau_Cin_Hau,
131 Phags_Pa,
132 Phoenician,
133 Psalter_Pahlavi,
134 Rejang,
135 Runic,
136 Samaritan,
137 Saurashtra,
138 Sharada,
139 Shavian,
140 Siddham,
141 SignWriting,
142 Sinhala,
143 Sogdian,
144 Sora_Sompeng,
145 Soyombo,
146 Sundanese,
147 Syloti_Nagri,
148 Syriac,
149 Tagalog,
150 Tagbanwa,
151 Tai_Le,
152 Tai_Tham,
153 Tai_Viet,
154 Takri,
155 Tamil,
156 Tangsa,
157 Tangut,
158 Telugu,
159 Thaana,
160 Thai,
161 Tibetan,
162 Tifinagh,
163 Tirhuta,
164 Toto,
165 Ugaritic,
166 Vai,
167 Vithkuqi,
168 Wancho,
169 Warang_Citi,
170 Yezidi,
171 Yi,
172 Zanabazar_Square,
173};
174
175allocator: mem.Allocator,
176s1: []u16 = undefined,
177s2: []u8 = undefined,
178s3: []u8 = undefined,
179
180const Self = @This();
181
182pub fn init(allocator: mem.Allocator) !Self {
183 const decompressor = compress.deflate.decompressor;
184 const in_bytes = @embedFile("scripts");
185 var in_fbs = std.io.fixedBufferStream(in_bytes);
186 var in_decomp = try decompressor(allocator, in_fbs.reader(), null);
187 defer in_decomp.deinit();
188 var reader = in_decomp.reader();
189
190 const endian = builtin.cpu.arch.endian();
191
192 var self = Self{ .allocator = allocator };
193
194 const s1_len: u16 = try reader.readInt(u16, endian);
195 self.s1 = try allocator.alloc(u16, s1_len);
196 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
197
198 const s2_len: u16 = try reader.readInt(u16, endian);
199 self.s2 = try allocator.alloc(u8, s2_len);
200 _ = try reader.readAll(self.s2);
201
202 const s3_len: u16 = try reader.readInt(u8, endian);
203 self.s3 = try allocator.alloc(u8, s3_len);
204 _ = try reader.readAll(self.s3);
205
206 return self;
207}
208
209pub fn deinit(self: *const Self) void {
210 self.allocator.free(self.s1);
211 self.allocator.free(self.s2);
212 self.allocator.free(self.s3);
213}
214
215/// Lookup the Script type for `cp`.
216pub fn script(self: Self, cp: u21) ?Script {
217 const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
218 if (byte == 0) return null;
219 return @enumFromInt(byte);
220}
221
222test "script" {
223 const self = try init(std.testing.allocator);
224 defer self.deinit();
225 try testing.expectEqual(Script.Latin, self.script('A').?);
226}