summaryrefslogtreecommitdiff
path: root/src/ScriptsData.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 15:32:34 -0400
committerGravatar Sam Atman2025-04-30 15:32:34 -0400
commit958c13ba442e7077a50d7163fdeb9bba378f95c2 (patch)
tree0727fd03ea2344ebbad842daa05b55ea0a143a6c /src/ScriptsData.zig
parentRemove FoldData, make CaseFolding (diff)
downloadzg-958c13ba442e7077a50d7163fdeb9bba378f95c2.tar.gz
zg-958c13ba442e7077a50d7163fdeb9bba378f95c2.tar.xz
zg-958c13ba442e7077a50d7163fdeb9bba378f95c2.zip
Rest of the Renamings
These get different names, but don't otherwise change.
Diffstat (limited to 'src/ScriptsData.zig')
-rw-r--r--src/ScriptsData.zig227
1 files changed, 0 insertions, 227 deletions
diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig
deleted file mode 100644
index 4ad8549..0000000
--- a/src/ScriptsData.zig
+++ /dev/null
@@ -1,227 +0,0 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7/// Scripts
8pub const Script = enum {
9 none,
10 Adlam,
11 Ahom,
12 Anatolian_Hieroglyphs,
13 Arabic,
14 Armenian,
15 Avestan,
16 Balinese,
17 Bamum,
18 Bassa_Vah,
19 Batak,
20 Bengali,
21 Bhaiksuki,
22 Bopomofo,
23 Brahmi,
24 Braille,
25 Buginese,
26 Buhid,
27 Canadian_Aboriginal,
28 Carian,
29 Caucasian_Albanian,
30 Chakma,
31 Cham,
32 Cherokee,
33 Chorasmian,
34 Common,
35 Coptic,
36 Cuneiform,
37 Cypriot,
38 Cypro_Minoan,
39 Cyrillic,
40 Deseret,
41 Devanagari,
42 Dives_Akuru,
43 Dogra,
44 Duployan,
45 Egyptian_Hieroglyphs,
46 Elbasan,
47 Elymaic,
48 Ethiopic,
49 Georgian,
50 Glagolitic,
51 Gothic,
52 Grantha,
53 Greek,
54 Gujarati,
55 Gunjala_Gondi,
56 Gurmukhi,
57 Han,
58 Hangul,
59 Hanifi_Rohingya,
60 Hanunoo,
61 Hatran,
62 Hebrew,
63 Hiragana,
64 Imperial_Aramaic,
65 Inherited,
66 Inscriptional_Pahlavi,
67 Inscriptional_Parthian,
68 Javanese,
69 Kaithi,
70 Kannada,
71 Katakana,
72 Kawi,
73 Kayah_Li,
74 Kharoshthi,
75 Khitan_Small_Script,
76 Khmer,
77 Khojki,
78 Khudawadi,
79 Lao,
80 Latin,
81 Lepcha,
82 Limbu,
83 Linear_A,
84 Linear_B,
85 Lisu,
86 Lycian,
87 Lydian,
88 Mahajani,
89 Makasar,
90 Malayalam,
91 Mandaic,
92 Manichaean,
93 Marchen,
94 Masaram_Gondi,
95 Medefaidrin,
96 Meetei_Mayek,
97 Mende_Kikakui,
98 Meroitic_Cursive,
99 Meroitic_Hieroglyphs,
100 Miao,
101 Modi,
102 Mongolian,
103 Mro,
104 Multani,
105 Myanmar,
106 Nabataean,
107 Nag_Mundari,
108 Nandinagari,
109 New_Tai_Lue,
110 Newa,
111 Nko,
112 Nushu,
113 Nyiakeng_Puachue_Hmong,
114 Ogham,
115 Ol_Chiki,
116 Old_Hungarian,
117 Old_Italic,
118 Old_North_Arabian,
119 Old_Permic,
120 Old_Persian,
121 Old_Sogdian,
122 Old_South_Arabian,
123 Old_Turkic,
124 Old_Uyghur,
125 Oriya,
126 Osage,
127 Osmanya,
128 Pahawh_Hmong,
129 Palmyrene,
130 Pau_Cin_Hau,
131 Phags_Pa,
132 Phoenician,
133 Psalter_Pahlavi,
134 Rejang,
135 Runic,
136 Samaritan,
137 Saurashtra,
138 Sharada,
139 Shavian,
140 Siddham,
141 SignWriting,
142 Sinhala,
143 Sogdian,
144 Sora_Sompeng,
145 Soyombo,
146 Sundanese,
147 Syloti_Nagri,
148 Syriac,
149 Tagalog,
150 Tagbanwa,
151 Tai_Le,
152 Tai_Tham,
153 Tai_Viet,
154 Takri,
155 Tamil,
156 Tangsa,
157 Tangut,
158 Telugu,
159 Thaana,
160 Thai,
161 Tibetan,
162 Tifinagh,
163 Tirhuta,
164 Toto,
165 Ugaritic,
166 Vai,
167 Vithkuqi,
168 Wancho,
169 Warang_Citi,
170 Yezidi,
171 Yi,
172 Zanabazar_Square,
173};
174
175s1: []u16 = undefined,
176s2: []u8 = undefined,
177s3: []u8 = undefined,
178
179const Self = @This();
180
181pub fn init(allocator: mem.Allocator) !Self {
182 const decompressor = compress.flate.inflate.decompressor;
183 const in_bytes = @embedFile("scripts");
184 var in_fbs = std.io.fixedBufferStream(in_bytes);
185 var in_decomp = decompressor(.raw, in_fbs.reader());
186 var reader = in_decomp.reader();
187
188 const endian = builtin.cpu.arch.endian();
189
190 var self = Self{};
191
192 const s1_len: u16 = try reader.readInt(u16, endian);
193 self.s1 = try allocator.alloc(u16, s1_len);
194 errdefer allocator.free(self.s1);
195 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
196
197 const s2_len: u16 = try reader.readInt(u16, endian);
198 self.s2 = try allocator.alloc(u8, s2_len);
199 errdefer allocator.free(self.s2);
200 _ = try reader.readAll(self.s2);
201
202 const s3_len: u16 = try reader.readInt(u8, endian);
203 self.s3 = try allocator.alloc(u8, s3_len);
204 errdefer allocator.free(self.s3);
205 _ = try reader.readAll(self.s3);
206
207 return self;
208}
209
210pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
211 allocator.free(self.s1);
212 allocator.free(self.s2);
213 allocator.free(self.s3);
214}
215
216/// Lookup the Script type for `cp`.
217pub fn script(self: Self, cp: u21) ?Script {
218 const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
219 if (byte == 0) return null;
220 return @enumFromInt(byte);
221}
222
223test "script" {
224 const self = try init(std.testing.allocator);
225 defer self.deinit(std.testing.allocator);
226 try testing.expectEqual(Script.Latin, self.script('A').?);
227}