diff options
| -rw-r--r-- | build.zig | 28 | ||||
| -rw-r--r-- | codegen/scripts.zig | 309 | ||||
| -rw-r--r-- | src/CanonData.zig | 2 | ||||
| -rw-r--r-- | src/CaseData.zig | 10 | ||||
| -rw-r--r-- | src/CaseFold.zig | 8 | ||||
| -rw-r--r-- | src/CombiningData.zig | 2 | ||||
| -rw-r--r-- | src/CompatData.zig | 2 | ||||
| -rw-r--r-- | src/DisplayWidth.zig | 10 | ||||
| -rw-r--r-- | src/FoldData.zig | 2 | ||||
| -rw-r--r-- | src/GenCatData.zig | 2 | ||||
| -rw-r--r-- | src/GraphemeData.zig | 2 | ||||
| -rw-r--r-- | src/HangulData.zig | 2 | ||||
| -rw-r--r-- | src/NormData.zig | 2 | ||||
| -rw-r--r-- | src/NormPropsData.zig | 2 | ||||
| -rw-r--r-- | src/Normalize.zig | 58 | ||||
| -rw-r--r-- | src/NumericData.zig | 4 | ||||
| -rw-r--r-- | src/ScriptsData.zig | 226 | ||||
| -rw-r--r-- | src/WidthData.zig | 2 | ||||
| -rw-r--r-- | src/grapheme.zig | 4 |
19 files changed, 615 insertions, 62 deletions
| @@ -137,6 +137,15 @@ pub fn build(b: *std.Build) void { | |||
| 137 | const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe); | 137 | const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe); |
| 138 | const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z"); | 138 | const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z"); |
| 139 | 139 | ||
| 140 | const scripts_gen_exe = b.addExecutable(.{ | ||
| 141 | .name = "scripts", | ||
| 142 | .root_source_file = .{ .path = "codegen/scripts.zig" }, | ||
| 143 | .target = b.host, | ||
| 144 | .optimize = .Debug, | ||
| 145 | }); | ||
| 146 | const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe); | ||
| 147 | const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z"); | ||
| 148 | |||
| 140 | // Modules we provide | 149 | // Modules we provide |
| 141 | // Code points | 150 | // Code points |
| 142 | const code_point = b.addModule("code_point", .{ | 151 | const code_point = b.addModule("code_point", .{ |
| @@ -287,14 +296,22 @@ pub fn build(b: *std.Build) void { | |||
| 287 | case_data.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out }); | 296 | case_data.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out }); |
| 288 | case_data.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out }); | 297 | case_data.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out }); |
| 289 | 298 | ||
| 299 | // Scripts | ||
| 300 | const scripts_data = b.addModule("ScriptsData", .{ | ||
| 301 | .root_source_file = .{ .path = "src/ScriptsData.zig" }, | ||
| 302 | .target = target, | ||
| 303 | .optimize = optimize, | ||
| 304 | }); | ||
| 305 | scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); | ||
| 306 | |||
| 290 | // Tests | 307 | // Tests |
| 291 | const exe_unit_tests = b.addTest(.{ | 308 | const exe_unit_tests = b.addTest(.{ |
| 292 | .root_source_file = .{ .path = "src/CaseData.zig" }, | 309 | .root_source_file = .{ .path = "src/ScriptsData.zig" }, |
| 293 | .target = target, | 310 | .target = target, |
| 294 | .optimize = optimize, | 311 | .optimize = optimize, |
| 295 | }); | 312 | }); |
| 296 | // exe_unit_tests.root_module.addImport("ascii", ascii); | 313 | // exe_unit_tests.root_module.addImport("ascii", ascii); |
| 297 | exe_unit_tests.root_module.addImport("code_point", code_point); | 314 | // exe_unit_tests.root_module.addImport("code_point", code_point); |
| 298 | // exe_unit_tests.root_module.addImport("GraphemeData", grapheme_data); | 315 | // exe_unit_tests.root_module.addImport("GraphemeData", grapheme_data); |
| 299 | // exe_unit_tests.root_module.addImport("grapheme", grapheme); | 316 | // exe_unit_tests.root_module.addImport("grapheme", grapheme); |
| 300 | // exe_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); | 317 | // exe_unit_tests.root_module.addImport("ziglyph", ziglyph.module("ziglyph")); |
| @@ -304,9 +321,10 @@ pub fn build(b: *std.Build) void { | |||
| 304 | // exe_unit_tests.root_module.addImport("Normalize", norm); | 321 | // exe_unit_tests.root_module.addImport("Normalize", norm); |
| 305 | // exe_unit_tests.root_module.addImport("FoldData", fold_data); | 322 | // exe_unit_tests.root_module.addImport("FoldData", fold_data); |
| 306 | // exe_unit_tests.root_module.addAnonymousImport("numeric", .{ .root_source_file = num_gen_out }); | 323 | // exe_unit_tests.root_module.addAnonymousImport("numeric", .{ .root_source_file = num_gen_out }); |
| 307 | exe_unit_tests.root_module.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out }); | 324 | // exe_unit_tests.root_module.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out }); |
| 308 | exe_unit_tests.root_module.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out }); | 325 | // exe_unit_tests.root_module.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out }); |
| 309 | exe_unit_tests.root_module.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out }); | 326 | // exe_unit_tests.root_module.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out }); |
| 327 | exe_unit_tests.root_module.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); | ||
| 310 | // exe_unit_tests.filter = "nfd !ASCII"; | 328 | // exe_unit_tests.filter = "nfd !ASCII"; |
| 311 | 329 | ||
| 312 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); | 330 | const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); |
diff --git a/codegen/scripts.zig b/codegen/scripts.zig new file mode 100644 index 0000000..e985c1e --- /dev/null +++ b/codegen/scripts.zig | |||
| @@ -0,0 +1,309 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | |||
| 4 | const Script = enum { | ||
| 5 | none, | ||
| 6 | Adlam, | ||
| 7 | Ahom, | ||
| 8 | Anatolian_Hieroglyphs, | ||
| 9 | Arabic, | ||
| 10 | Armenian, | ||
| 11 | Avestan, | ||
| 12 | Balinese, | ||
| 13 | Bamum, | ||
| 14 | Bassa_Vah, | ||
| 15 | Batak, | ||
| 16 | Bengali, | ||
| 17 | Bhaiksuki, | ||
| 18 | Bopomofo, | ||
| 19 | Brahmi, | ||
| 20 | Braille, | ||
| 21 | Buginese, | ||
| 22 | Buhid, | ||
| 23 | Canadian_Aboriginal, | ||
| 24 | Carian, | ||
| 25 | Caucasian_Albanian, | ||
| 26 | Chakma, | ||
| 27 | Cham, | ||
| 28 | Cherokee, | ||
| 29 | Chorasmian, | ||
| 30 | Common, | ||
| 31 | Coptic, | ||
| 32 | Cuneiform, | ||
| 33 | Cypriot, | ||
| 34 | Cypro_Minoan, | ||
| 35 | Cyrillic, | ||
| 36 | Deseret, | ||
| 37 | Devanagari, | ||
| 38 | Dives_Akuru, | ||
| 39 | Dogra, | ||
| 40 | Duployan, | ||
| 41 | Egyptian_Hieroglyphs, | ||
| 42 | Elbasan, | ||
| 43 | Elymaic, | ||
| 44 | Ethiopic, | ||
| 45 | Georgian, | ||
| 46 | Glagolitic, | ||
| 47 | Gothic, | ||
| 48 | Grantha, | ||
| 49 | Greek, | ||
| 50 | Gujarati, | ||
| 51 | Gunjala_Gondi, | ||
| 52 | Gurmukhi, | ||
| 53 | Han, | ||
| 54 | Hangul, | ||
| 55 | Hanifi_Rohingya, | ||
| 56 | Hanunoo, | ||
| 57 | Hatran, | ||
| 58 | Hebrew, | ||
| 59 | Hiragana, | ||
| 60 | Imperial_Aramaic, | ||
| 61 | Inherited, | ||
| 62 | Inscriptional_Pahlavi, | ||
| 63 | Inscriptional_Parthian, | ||
| 64 | Javanese, | ||
| 65 | Kaithi, | ||
| 66 | Kannada, | ||
| 67 | Katakana, | ||
| 68 | Kawi, | ||
| 69 | Kayah_Li, | ||
| 70 | Kharoshthi, | ||
| 71 | Khitan_Small_Script, | ||
| 72 | Khmer, | ||
| 73 | Khojki, | ||
| 74 | Khudawadi, | ||
| 75 | Lao, | ||
| 76 | Latin, | ||
| 77 | Lepcha, | ||
| 78 | Limbu, | ||
| 79 | Linear_A, | ||
| 80 | Linear_B, | ||
| 81 | Lisu, | ||
| 82 | Lycian, | ||
| 83 | Lydian, | ||
| 84 | Mahajani, | ||
| 85 | Makasar, | ||
| 86 | Malayalam, | ||
| 87 | Mandaic, | ||
| 88 | Manichaean, | ||
| 89 | Marchen, | ||
| 90 | Masaram_Gondi, | ||
| 91 | Medefaidrin, | ||
| 92 | Meetei_Mayek, | ||
| 93 | Mende_Kikakui, | ||
| 94 | Meroitic_Cursive, | ||
| 95 | Meroitic_Hieroglyphs, | ||
| 96 | Miao, | ||
| 97 | Modi, | ||
| 98 | Mongolian, | ||
| 99 | Mro, | ||
| 100 | Multani, | ||
| 101 | Myanmar, | ||
| 102 | Nabataean, | ||
| 103 | Nag_Mundari, | ||
| 104 | Nandinagari, | ||
| 105 | New_Tai_Lue, | ||
| 106 | Newa, | ||
| 107 | Nko, | ||
| 108 | Nushu, | ||
| 109 | Nyiakeng_Puachue_Hmong, | ||
| 110 | Ogham, | ||
| 111 | Ol_Chiki, | ||
| 112 | Old_Hungarian, | ||
| 113 | Old_Italic, | ||
| 114 | Old_North_Arabian, | ||
| 115 | Old_Permic, | ||
| 116 | Old_Persian, | ||
| 117 | Old_Sogdian, | ||
| 118 | Old_South_Arabian, | ||
| 119 | Old_Turkic, | ||
| 120 | Old_Uyghur, | ||
| 121 | Oriya, | ||
| 122 | Osage, | ||
| 123 | Osmanya, | ||
| 124 | Pahawh_Hmong, | ||
| 125 | Palmyrene, | ||
| 126 | Pau_Cin_Hau, | ||
| 127 | Phags_Pa, | ||
| 128 | Phoenician, | ||
| 129 | Psalter_Pahlavi, | ||
| 130 | Rejang, | ||
| 131 | Runic, | ||
| 132 | Samaritan, | ||
| 133 | Saurashtra, | ||
| 134 | Sharada, | ||
| 135 | Shavian, | ||
| 136 | Siddham, | ||
| 137 | SignWriting, | ||
| 138 | Sinhala, | ||
| 139 | Sogdian, | ||
| 140 | Sora_Sompeng, | ||
| 141 | Soyombo, | ||
| 142 | Sundanese, | ||
| 143 | Syloti_Nagri, | ||
| 144 | Syriac, | ||
| 145 | Tagalog, | ||
| 146 | Tagbanwa, | ||
| 147 | Tai_Le, | ||
| 148 | Tai_Tham, | ||
| 149 | Tai_Viet, | ||
| 150 | Takri, | ||
| 151 | Tamil, | ||
| 152 | Tangsa, | ||
| 153 | Tangut, | ||
| 154 | Telugu, | ||
| 155 | Thaana, | ||
| 156 | Thai, | ||
| 157 | Tibetan, | ||
| 158 | Tifinagh, | ||
| 159 | Tirhuta, | ||
| 160 | Toto, | ||
| 161 | Ugaritic, | ||
| 162 | Vai, | ||
| 163 | Vithkuqi, | ||
| 164 | Wancho, | ||
| 165 | Warang_Citi, | ||
| 166 | Yezidi, | ||
| 167 | Yi, | ||
| 168 | Zanabazar_Square, | ||
| 169 | }; | ||
| 170 | |||
| 171 | const block_size = 256; | ||
| 172 | const Block = [block_size]u8; | ||
| 173 | |||
| 174 | const BlockMap = std.HashMap( | ||
| 175 | Block, | ||
| 176 | u16, | ||
| 177 | struct { | ||
| 178 | pub fn hash(_: @This(), k: Block) u64 { | ||
| 179 | var hasher = std.hash.Wyhash.init(0); | ||
| 180 | std.hash.autoHashStrat(&hasher, k, .DeepRecursive); | ||
| 181 | return hasher.final(); | ||
| 182 | } | ||
| 183 | |||
| 184 | pub fn eql(_: @This(), a: Block, b: Block) bool { | ||
| 185 | return std.mem.eql(u8, &a, &b); | ||
| 186 | } | ||
| 187 | }, | ||
| 188 | std.hash_map.default_max_load_percentage, | ||
| 189 | ); | ||
| 190 | |||
| 191 | pub fn main() !void { | ||
| 192 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | ||
| 193 | defer arena.deinit(); | ||
| 194 | const allocator = arena.allocator(); | ||
| 195 | |||
| 196 | var flat_map = std.AutoHashMap(u21, u8).init(allocator); | ||
| 197 | defer flat_map.deinit(); | ||
| 198 | |||
| 199 | var line_buf: [4096]u8 = undefined; | ||
| 200 | |||
| 201 | // Process DerivedGeneralCategory.txt | ||
| 202 | var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{}); | ||
| 203 | defer in_file.close(); | ||
| 204 | var in_buf = std.io.bufferedReader(in_file.reader()); | ||
| 205 | const in_reader = in_buf.reader(); | ||
| 206 | |||
| 207 | while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { | ||
| 208 | if (line.len == 0 or line[0] == '#') continue; | ||
| 209 | |||
| 210 | const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; | ||
| 211 | |||
| 212 | var field_iter = std.mem.tokenizeAny(u8, no_comment, "; "); | ||
| 213 | var current_code: [2]u21 = undefined; | ||
| 214 | |||
| 215 | var i: usize = 0; | ||
| 216 | while (field_iter.next()) |field| : (i += 1) { | ||
| 217 | switch (i) { | ||
| 218 | 0 => { | ||
| 219 | // Code point(s) | ||
| 220 | if (std.mem.indexOf(u8, field, "..")) |dots| { | ||
| 221 | current_code = .{ | ||
| 222 | try std.fmt.parseInt(u21, field[0..dots], 16), | ||
| 223 | try std.fmt.parseInt(u21, field[dots + 2 ..], 16), | ||
| 224 | }; | ||
| 225 | } else { | ||
| 226 | const code = try std.fmt.parseInt(u21, field, 16); | ||
| 227 | current_code = .{ code, code }; | ||
| 228 | } | ||
| 229 | }, | ||
| 230 | 1 => { | ||
| 231 | // Script | ||
| 232 | const script = std.meta.stringToEnum(Script, field) orelse { | ||
| 233 | std.debug.print("Unknown script: {s}\n", .{field}); | ||
| 234 | return error.UnknownScript; | ||
| 235 | }; | ||
| 236 | for (current_code[0]..current_code[1] + 1) |cp| try flat_map.put(@intCast(cp), @intFromEnum(script)); | ||
| 237 | }, | ||
| 238 | else => {}, | ||
| 239 | } | ||
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | var blocks_map = BlockMap.init(allocator); | ||
| 244 | defer blocks_map.deinit(); | ||
| 245 | |||
| 246 | var stage1 = std.ArrayList(u16).init(allocator); | ||
| 247 | defer stage1.deinit(); | ||
| 248 | |||
| 249 | var stage2 = std.ArrayList(u8).init(allocator); | ||
| 250 | defer stage2.deinit(); | ||
| 251 | |||
| 252 | var stage3 = std.ArrayList(u8).init(allocator); | ||
| 253 | defer stage3.deinit(); | ||
| 254 | |||
| 255 | var block: Block = [_]u8{0} ** block_size; | ||
| 256 | var block_len: u16 = 0; | ||
| 257 | |||
| 258 | for (0..0x110000) |i| { | ||
| 259 | const cp: u21 = @intCast(i); | ||
| 260 | const script = flat_map.get(cp) orelse 0; | ||
| 261 | |||
| 262 | const stage3_idx = blk: { | ||
| 263 | for (stage3.items, 0..) |script_i, j| { | ||
| 264 | if (script == script_i) break :blk j; | ||
| 265 | } | ||
| 266 | try stage3.append(script); | ||
| 267 | break :blk stage3.items.len - 1; | ||
| 268 | }; | ||
| 269 | |||
| 270 | // Process block | ||
| 271 | block[block_len] = @intCast(stage3_idx); | ||
| 272 | block_len += 1; | ||
| 273 | |||
| 274 | if (block_len < block_size and cp != 0x10ffff) continue; | ||
| 275 | |||
| 276 | const gop = try blocks_map.getOrPut(block); | ||
| 277 | if (!gop.found_existing) { | ||
| 278 | gop.value_ptr.* = @intCast(stage2.items.len); | ||
| 279 | try stage2.appendSlice(&block); | ||
| 280 | } | ||
| 281 | |||
| 282 | try stage1.append(gop.value_ptr.*); | ||
| 283 | block_len = 0; | ||
| 284 | } | ||
| 285 | |||
| 286 | var args_iter = try std.process.argsWithAllocator(allocator); | ||
| 287 | defer args_iter.deinit(); | ||
| 288 | _ = args_iter.skip(); | ||
| 289 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | ||
| 290 | |||
| 291 | const compressor = std.compress.deflate.compressor; | ||
| 292 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | ||
| 293 | defer out_file.close(); | ||
| 294 | var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression }); | ||
| 295 | defer out_comp.deinit(); | ||
| 296 | const writer = out_comp.writer(); | ||
| 297 | |||
| 298 | const endian = builtin.cpu.arch.endian(); | ||
| 299 | try writer.writeInt(u16, @intCast(stage1.items.len), endian); | ||
| 300 | for (stage1.items) |i| try writer.writeInt(u16, i, endian); | ||
| 301 | |||
| 302 | try writer.writeInt(u16, @intCast(stage2.items.len), endian); | ||
| 303 | for (stage2.items) |i| try writer.writeInt(u8, i, endian); | ||
| 304 | |||
| 305 | try writer.writeInt(u8, @intCast(stage3.items.len), endian); | ||
| 306 | for (stage3.items) |i| try writer.writeInt(u8, i, endian); | ||
| 307 | |||
| 308 | try out_comp.flush(); | ||
| 309 | } | ||
diff --git a/src/CanonData.zig b/src/CanonData.zig index 36895ff..9f1deb8 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -42,7 +42,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 42 | return self; | 42 | return self; |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | pub fn deinit(self: *Self) void { | 45 | pub fn deinit(self: *const Self) void { |
| 46 | self.nfc.deinit(); | 46 | self.nfc.deinit(); |
| 47 | for (self.nfd) |slice| self.allocator.free(slice); | 47 | for (self.nfd) |slice| self.allocator.free(slice); |
| 48 | self.allocator.free(self.nfd); | 48 | self.allocator.free(self.nfd); |
diff --git a/src/CaseData.zig b/src/CaseData.zig index 4f06636..c9ccc1e 100644 --- a/src/CaseData.zig +++ b/src/CaseData.zig | |||
| @@ -77,7 +77,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 77 | return self; | 77 | return self; |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | pub fn deinit(self: *Self) void { | 80 | pub fn deinit(self: *const Self) void { |
| 81 | self.allocator.free(self.case_map); | 81 | self.allocator.free(self.case_map); |
| 82 | self.allocator.free(self.prop_s1); | 82 | self.allocator.free(self.prop_s1); |
| 83 | self.allocator.free(self.prop_s2); | 83 | self.allocator.free(self.prop_s2); |
| @@ -103,7 +103,7 @@ pub fn isUpperStr(self: Self, str: []const u8) bool { | |||
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | test "isUpperStr" { | 105 | test "isUpperStr" { |
| 106 | var cd = try init(testing.allocator); | 106 | const cd = try init(testing.allocator); |
| 107 | defer cd.deinit(); | 107 | defer cd.deinit(); |
| 108 | 108 | ||
| 109 | try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!")); | 109 | try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!")); |
| @@ -138,7 +138,7 @@ pub fn toUpperStr( | |||
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | test "toUpperStr" { | 140 | test "toUpperStr" { |
| 141 | var cd = try init(testing.allocator); | 141 | const cd = try init(testing.allocator); |
| 142 | defer cd.deinit(); | 142 | defer cd.deinit(); |
| 143 | 143 | ||
| 144 | const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!"); | 144 | const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!"); |
| @@ -161,7 +161,7 @@ pub fn isLowerStr(self: Self, str: []const u8) bool { | |||
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | test "isLowerStr" { | 163 | test "isLowerStr" { |
| 164 | var cd = try init(testing.allocator); | 164 | const cd = try init(testing.allocator); |
| 165 | defer cd.deinit(); | 165 | defer cd.deinit(); |
| 166 | 166 | ||
| 167 | try testing.expect(cd.isLowerStr("hello, world 2112!")); | 167 | try testing.expect(cd.isLowerStr("hello, world 2112!")); |
| @@ -196,7 +196,7 @@ pub fn toLowerStr( | |||
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | test "toLowerStr" { | 198 | test "toLowerStr" { |
| 199 | var cd = try init(testing.allocator); | 199 | const cd = try init(testing.allocator); |
| 200 | defer cd.deinit(); | 200 | defer cd.deinit(); |
| 201 | 201 | ||
| 202 | const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!"); | 202 | const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!"); |
diff --git a/src/CaseFold.zig b/src/CaseFold.zig index e00d03b..9b10e16 100644 --- a/src/CaseFold.zig +++ b/src/CaseFold.zig | |||
| @@ -90,11 +90,11 @@ pub fn compatCaselessMatch( | |||
| 90 | test "compatCaselessMatch" { | 90 | test "compatCaselessMatch" { |
| 91 | const allocator = testing.allocator; | 91 | const allocator = testing.allocator; |
| 92 | 92 | ||
| 93 | var norm_data = try Normalize.NormData.init(allocator); | 93 | const norm_data = try Normalize.NormData.init(allocator); |
| 94 | defer norm_data.deinit(); | 94 | defer norm_data.deinit(); |
| 95 | const n = Normalize{ .norm_data = &norm_data }; | 95 | const n = Normalize{ .norm_data = &norm_data }; |
| 96 | 96 | ||
| 97 | var fold_data = try FoldData.init(allocator); | 97 | const fold_data = try FoldData.init(allocator); |
| 98 | defer fold_data.deinit(); | 98 | defer fold_data.deinit(); |
| 99 | const caser = Self{ .fold_data = &fold_data }; | 99 | const caser = Self{ .fold_data = &fold_data }; |
| 100 | 100 | ||
| @@ -163,11 +163,11 @@ pub fn canonCaselessMatch( | |||
| 163 | test "canonCaselessMatch" { | 163 | test "canonCaselessMatch" { |
| 164 | const allocator = testing.allocator; | 164 | const allocator = testing.allocator; |
| 165 | 165 | ||
| 166 | var norm_data = try Normalize.NormData.init(allocator); | 166 | const norm_data = try Normalize.NormData.init(allocator); |
| 167 | defer norm_data.deinit(); | 167 | defer norm_data.deinit(); |
| 168 | const n = Normalize{ .norm_data = &norm_data }; | 168 | const n = Normalize{ .norm_data = &norm_data }; |
| 169 | 169 | ||
| 170 | var fold_data = try FoldData.init(allocator); | 170 | const fold_data = try FoldData.init(allocator); |
| 171 | defer fold_data.deinit(); | 171 | defer fold_data.deinit(); |
| 172 | const caser = Self{ .fold_data = &fold_data }; | 172 | const caser = Self{ .fold_data = &fold_data }; |
| 173 | 173 | ||
diff --git a/src/CombiningData.zig b/src/CombiningData.zig index 95c947d..c67638c 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig | |||
| @@ -32,7 +32,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 32 | return self; | 32 | return self; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | pub fn deinit(self: *Self) void { | 35 | pub fn deinit(self: *const Self) void { |
| 36 | self.allocator.free(self.s1); | 36 | self.allocator.free(self.s1); |
| 37 | self.allocator.free(self.s2); | 37 | self.allocator.free(self.s2); |
| 38 | } | 38 | } |
diff --git a/src/CompatData.zig b/src/CompatData.zig index fd7f678..67c43e6 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig | |||
| @@ -37,7 +37,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 37 | return self; | 37 | return self; |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | pub fn deinit(self: *Self) void { | 40 | pub fn deinit(self: *const Self) void { |
| 41 | for (self.nfkd) |slice| { | 41 | for (self.nfkd) |slice| { |
| 42 | if (slice.len != 0) self.allocator.free(slice); | 42 | if (slice.len != 0) self.allocator.free(slice); |
| 43 | } | 43 | } |
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 8d5eb0f..e547adf 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -56,7 +56,7 @@ pub fn strWidth(self: Self, str: []const u8) usize { | |||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | test "strWidth" { | 58 | test "strWidth" { |
| 59 | var data = try DisplayWidthData.init(testing.allocator); | 59 | const data = try DisplayWidthData.init(testing.allocator); |
| 60 | defer data.deinit(); | 60 | defer data.deinit(); |
| 61 | const self = Self{ .data = &data }; | 61 | const self = Self{ .data = &data }; |
| 62 | 62 | ||
| @@ -157,7 +157,7 @@ pub fn center( | |||
| 157 | 157 | ||
| 158 | test "center" { | 158 | test "center" { |
| 159 | const allocator = testing.allocator; | 159 | const allocator = testing.allocator; |
| 160 | var data = try DisplayWidthData.init(allocator); | 160 | const data = try DisplayWidthData.init(allocator); |
| 161 | defer data.deinit(); | 161 | defer data.deinit(); |
| 162 | const self = Self{ .data = &data }; | 162 | const self = Self{ .data = &data }; |
| 163 | 163 | ||
| @@ -236,7 +236,7 @@ pub fn padLeft( | |||
| 236 | 236 | ||
| 237 | test "padLeft" { | 237 | test "padLeft" { |
| 238 | const allocator = testing.allocator; | 238 | const allocator = testing.allocator; |
| 239 | var data = try DisplayWidthData.init(allocator); | 239 | const data = try DisplayWidthData.init(allocator); |
| 240 | defer data.deinit(); | 240 | defer data.deinit(); |
| 241 | const self = Self{ .data = &data }; | 241 | const self = Self{ .data = &data }; |
| 242 | 242 | ||
| @@ -286,7 +286,7 @@ pub fn padRight( | |||
| 286 | 286 | ||
| 287 | test "padRight" { | 287 | test "padRight" { |
| 288 | const allocator = testing.allocator; | 288 | const allocator = testing.allocator; |
| 289 | var data = try DisplayWidthData.init(allocator); | 289 | const data = try DisplayWidthData.init(allocator); |
| 290 | defer data.deinit(); | 290 | defer data.deinit(); |
| 291 | const self = Self{ .data = &data }; | 291 | const self = Self{ .data = &data }; |
| 292 | 292 | ||
| @@ -339,7 +339,7 @@ pub fn wrap( | |||
| 339 | 339 | ||
| 340 | test "wrap" { | 340 | test "wrap" { |
| 341 | const allocator = testing.allocator; | 341 | const allocator = testing.allocator; |
| 342 | var data = try DisplayWidthData.init(allocator); | 342 | const data = try DisplayWidthData.init(allocator); |
| 343 | defer data.deinit(); | 343 | defer data.deinit(); |
| 344 | const self = Self{ .data = &data }; | 344 | const self = Self{ .data = &data }; |
| 345 | 345 | ||
diff --git a/src/FoldData.zig b/src/FoldData.zig index 2a9a1f5..e387447 100644 --- a/src/FoldData.zig +++ b/src/FoldData.zig | |||
| @@ -41,7 +41,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 41 | return self; | 41 | return self; |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | pub fn deinit(self: *Self) void { | 44 | pub fn deinit(self: *const Self) void { |
| 45 | for (self.fold) |slice| self.allocator.free(slice); | 45 | for (self.fold) |slice| self.allocator.free(slice); |
| 46 | self.allocator.free(self.fold); | 46 | self.allocator.free(self.fold); |
| 47 | self.allocator.free(self.cwcf); | 47 | self.allocator.free(self.cwcf); |
diff --git a/src/GenCatData.zig b/src/GenCatData.zig index b45135b..37ae037 100644 --- a/src/GenCatData.zig +++ b/src/GenCatData.zig | |||
| @@ -71,7 +71,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 71 | return self; | 71 | return self; |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | pub fn deinit(self: *Self) void { | 74 | pub fn deinit(self: *const Self) void { |
| 75 | self.allocator.free(self.s1); | 75 | self.allocator.free(self.s1); |
| 76 | self.allocator.free(self.s2); | 76 | self.allocator.free(self.s2); |
| 77 | self.allocator.free(self.s3); | 77 | self.allocator.free(self.s3); |
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig index e418dea..971929a 100644 --- a/src/GraphemeData.zig +++ b/src/GraphemeData.zig | |||
| @@ -64,7 +64,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 64 | return self; | 64 | return self; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | pub fn deinit(self: *Self) void { | 67 | pub fn deinit(self: *const Self) void { |
| 68 | self.allocator.free(self.s1); | 68 | self.allocator.free(self.s1); |
| 69 | self.allocator.free(self.s2); | 69 | self.allocator.free(self.s2); |
| 70 | self.allocator.free(self.s3); | 70 | self.allocator.free(self.s3); |
diff --git a/src/HangulData.zig b/src/HangulData.zig index b97424c..ec360e9 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig | |||
| @@ -41,7 +41,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 41 | return self; | 41 | return self; |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | pub fn deinit(self: *Self) void { | 44 | pub fn deinit(self: *const Self) void { |
| 45 | self.allocator.free(self.s1); | 45 | self.allocator.free(self.s1); |
| 46 | self.allocator.free(self.s2); | 46 | self.allocator.free(self.s2); |
| 47 | } | 47 | } |
diff --git a/src/NormData.zig b/src/NormData.zig index 8a7fa49..413619a 100644 --- a/src/NormData.zig +++ b/src/NormData.zig | |||
| @@ -26,7 +26,7 @@ pub fn init(allocator: std.mem.Allocator) !Self { | |||
| 26 | }; | 26 | }; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | pub fn deinit(self: *Self) void { | 29 | pub fn deinit(self: *const Self) void { |
| 30 | self.canon_data.deinit(); | 30 | self.canon_data.deinit(); |
| 31 | self.ccc_data.deinit(); | 31 | self.ccc_data.deinit(); |
| 32 | self.compat_data.deinit(); | 32 | self.compat_data.deinit(); |
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index 3c49712..893a8d0 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig | |||
| @@ -32,7 +32,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 32 | return self; | 32 | return self; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | pub fn deinit(self: *Self) void { | 35 | pub fn deinit(self: *const Self) void { |
| 36 | self.allocator.free(self.s1); | 36 | self.allocator.free(self.s1); |
| 37 | self.allocator.free(self.s2); | 37 | self.allocator.free(self.s2); |
| 38 | } | 38 | } |
diff --git a/src/Normalize.zig b/src/Normalize.zig index 6ef7c90..daf774d 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -177,7 +177,7 @@ fn decompose( | |||
| 177 | 177 | ||
| 178 | test "decompose" { | 178 | test "decompose" { |
| 179 | const allocator = testing.allocator; | 179 | const allocator = testing.allocator; |
| 180 | var data = try NormData.init(allocator); | 180 | const data = try NormData.init(allocator); |
| 181 | defer data.deinit(); | 181 | defer data.deinit(); |
| 182 | var n = Self{ .norm_data = &data }; | 182 | var n = Self{ .norm_data = &data }; |
| 183 | 183 | ||
| @@ -225,7 +225,7 @@ pub const Result = struct { | |||
| 225 | allocator: ?mem.Allocator = null, | 225 | allocator: ?mem.Allocator = null, |
| 226 | slice: []const u8, | 226 | slice: []const u8, |
| 227 | 227 | ||
| 228 | pub fn deinit(self: *Result) void { | 228 | pub fn deinit(self: *const Result) void { |
| 229 | if (self.allocator) |allocator| allocator.free(self.slice); | 229 | if (self.allocator) |allocator| allocator.free(self.slice); |
| 230 | } | 230 | } |
| 231 | }; | 231 | }; |
| @@ -297,11 +297,11 @@ fn nfxd(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) !Resu | |||
| 297 | 297 | ||
| 298 | test "nfd ASCII / no-alloc" { | 298 | test "nfd ASCII / no-alloc" { |
| 299 | const allocator = testing.allocator; | 299 | const allocator = testing.allocator; |
| 300 | var data = try NormData.init(allocator); | 300 | const data = try NormData.init(allocator); |
| 301 | defer data.deinit(); | 301 | defer data.deinit(); |
| 302 | var n = Self{ .norm_data = &data }; | 302 | const n = Self{ .norm_data = &data }; |
| 303 | 303 | ||
| 304 | var result = try n.nfd(allocator, "Hello World!"); | 304 | const result = try n.nfd(allocator, "Hello World!"); |
| 305 | defer result.deinit(); | 305 | defer result.deinit(); |
| 306 | 306 | ||
| 307 | try testing.expectEqualStrings("Hello World!", result.slice); | 307 | try testing.expectEqualStrings("Hello World!", result.slice); |
| @@ -309,11 +309,11 @@ test "nfd ASCII / no-alloc" { | |||
| 309 | 309 | ||
| 310 | test "nfd !ASCII / alloc" { | 310 | test "nfd !ASCII / alloc" { |
| 311 | const allocator = testing.allocator; | 311 | const allocator = testing.allocator; |
| 312 | var data = try NormData.init(allocator); | 312 | const data = try NormData.init(allocator); |
| 313 | defer data.deinit(); | 313 | defer data.deinit(); |
| 314 | var n = Self{ .norm_data = &data }; | 314 | const n = Self{ .norm_data = &data }; |
| 315 | 315 | ||
| 316 | var result = try n.nfd(allocator, "Héllo World! \u{3d3}"); | 316 | const result = try n.nfd(allocator, "Héllo World! \u{3d3}"); |
| 317 | defer result.deinit(); | 317 | defer result.deinit(); |
| 318 | 318 | ||
| 319 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); | 319 | try testing.expectEqualStrings("He\u{301}llo World! \u{3d2}\u{301}", result.slice); |
| @@ -321,11 +321,11 @@ test "nfd !ASCII / alloc" { | |||
| 321 | 321 | ||
| 322 | test "nfkd ASCII / no-alloc" { | 322 | test "nfkd ASCII / no-alloc" { |
| 323 | const allocator = testing.allocator; | 323 | const allocator = testing.allocator; |
| 324 | var data = try NormData.init(allocator); | 324 | const data = try NormData.init(allocator); |
| 325 | defer data.deinit(); | 325 | defer data.deinit(); |
| 326 | var n = Self{ .norm_data = &data }; | 326 | const n = Self{ .norm_data = &data }; |
| 327 | 327 | ||
| 328 | var result = try n.nfkd(allocator, "Hello World!"); | 328 | const result = try n.nfkd(allocator, "Hello World!"); |
| 329 | defer result.deinit(); | 329 | defer result.deinit(); |
| 330 | 330 | ||
| 331 | try testing.expectEqualStrings("Hello World!", result.slice); | 331 | try testing.expectEqualStrings("Hello World!", result.slice); |
| @@ -333,11 +333,11 @@ test "nfkd ASCII / no-alloc" { | |||
| 333 | 333 | ||
| 334 | test "nfkd !ASCII / alloc" { | 334 | test "nfkd !ASCII / alloc" { |
| 335 | const allocator = testing.allocator; | 335 | const allocator = testing.allocator; |
| 336 | var data = try NormData.init(allocator); | 336 | const data = try NormData.init(allocator); |
| 337 | defer data.deinit(); | 337 | defer data.deinit(); |
| 338 | var n = Self{ .norm_data = &data }; | 338 | const n = Self{ .norm_data = &data }; |
| 339 | 339 | ||
| 340 | var result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); | 340 | const result = try n.nfkd(allocator, "Héllo World! \u{3d3}"); |
| 341 | defer result.deinit(); | 341 | defer result.deinit(); |
| 342 | 342 | ||
| 343 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); | 343 | try testing.expectEqualStrings("He\u{301}llo World! \u{3a5}\u{301}", result.slice); |
| @@ -532,11 +532,11 @@ fn nfxc(self: Self, allocator: mem.Allocator, str: []const u8, form: Form) !Resu | |||
| 532 | 532 | ||
| 533 | test "nfc" { | 533 | test "nfc" { |
| 534 | const allocator = testing.allocator; | 534 | const allocator = testing.allocator; |
| 535 | var data = try NormData.init(allocator); | 535 | const data = try NormData.init(allocator); |
| 536 | defer data.deinit(); | 536 | defer data.deinit(); |
| 537 | var n = Self{ .norm_data = &data }; | 537 | const n = Self{ .norm_data = &data }; |
| 538 | 538 | ||
| 539 | var result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); | 539 | const result = try n.nfc(allocator, "Complex char: \u{3D2}\u{301}"); |
| 540 | defer result.deinit(); | 540 | defer result.deinit(); |
| 541 | 541 | ||
| 542 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); | 542 | try testing.expectEqualStrings("Complex char: \u{3D3}", result.slice); |
| @@ -544,11 +544,11 @@ test "nfc" { | |||
| 544 | 544 | ||
| 545 | test "nfkc" { | 545 | test "nfkc" { |
| 546 | const allocator = testing.allocator; | 546 | const allocator = testing.allocator; |
| 547 | var data = try NormData.init(allocator); | 547 | const data = try NormData.init(allocator); |
| 548 | defer data.deinit(); | 548 | defer data.deinit(); |
| 549 | var n = Self{ .norm_data = &data }; | 549 | const n = Self{ .norm_data = &data }; |
| 550 | 550 | ||
| 551 | var result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); | 551 | const result = try n.nfkc(allocator, "Complex char: \u{03A5}\u{0301}"); |
| 552 | defer result.deinit(); | 552 | defer result.deinit(); |
| 553 | 553 | ||
| 554 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); | 554 | try testing.expectEqualStrings("Complex char: \u{038E}", result.slice); |
| @@ -556,9 +556,9 @@ test "nfkc" { | |||
| 556 | 556 | ||
| 557 | /// Tests for equality of `a` and `b` after normalizing to NFC. | 557 | /// Tests for equality of `a` and `b` after normalizing to NFC. |
| 558 | pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool { | 558 | pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) !bool { |
| 559 | var norm_result_a = try self.nfc(allocator, a); | 559 | const norm_result_a = try self.nfc(allocator, a); |
| 560 | defer norm_result_a.deinit(); | 560 | defer norm_result_a.deinit(); |
| 561 | var norm_result_b = try self.nfc(allocator, b); | 561 | const norm_result_b = try self.nfc(allocator, b); |
| 562 | defer norm_result_b.deinit(); | 562 | defer norm_result_b.deinit(); |
| 563 | 563 | ||
| 564 | return mem.eql(u8, norm_result_a.slice, norm_result_b.slice); | 564 | return mem.eql(u8, norm_result_a.slice, norm_result_b.slice); |
| @@ -566,9 +566,9 @@ pub fn eql(self: Self, allocator: mem.Allocator, a: []const u8, b: []const u8) ! | |||
| 566 | 566 | ||
| 567 | test "eql" { | 567 | test "eql" { |
| 568 | const allocator = testing.allocator; | 568 | const allocator = testing.allocator; |
| 569 | var data = try NormData.init(allocator); | 569 | const data = try NormData.init(allocator); |
| 570 | defer data.deinit(); | 570 | defer data.deinit(); |
| 571 | var n = Self{ .norm_data = &data }; | 571 | const n = Self{ .norm_data = &data }; |
| 572 | 572 | ||
| 573 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); | 573 | try testing.expect(try n.eql(allocator, "foé", "foe\u{0301}")); |
| 574 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); | 574 | try testing.expect(try n.eql(allocator, "foϓ", "fo\u{03D2}\u{0301}")); |
| @@ -601,9 +601,9 @@ fn isFcd(self: Self, str: []const u8) bool { | |||
| 601 | 601 | ||
| 602 | test "isFcd" { | 602 | test "isFcd" { |
| 603 | const allocator = testing.allocator; | 603 | const allocator = testing.allocator; |
| 604 | var data = try NormData.init(allocator); | 604 | const data = try NormData.init(allocator); |
| 605 | defer data.deinit(); | 605 | defer data.deinit(); |
| 606 | var n = Self{ .norm_data = &data }; | 606 | const n = Self{ .norm_data = &data }; |
| 607 | 607 | ||
| 608 | const is_nfc = "José \u{3D3}"; | 608 | const is_nfc = "José \u{3D3}"; |
| 609 | try testing.expect(n.isFcd(is_nfc)); | 609 | try testing.expect(n.isFcd(is_nfc)); |
| @@ -620,9 +620,9 @@ test "Unicode normalization tests" { | |||
| 620 | defer arena.deinit(); | 620 | defer arena.deinit(); |
| 621 | var allocator = arena.allocator(); | 621 | var allocator = arena.allocator(); |
| 622 | 622 | ||
| 623 | var data = try NormData.init(allocator); | 623 | const data = try NormData.init(allocator); |
| 624 | defer data.deinit(); | 624 | defer data.deinit(); |
| 625 | var n = Self{ .norm_data = &data }; | 625 | const n = Self{ .norm_data = &data }; |
| 626 | 626 | ||
| 627 | var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); | 627 | var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); |
| 628 | defer file.close(); | 628 | defer file.close(); |
| @@ -721,7 +721,7 @@ test "Unicode normalization tests" { | |||
| 721 | } | 721 | } |
| 722 | 722 | ||
| 723 | const want = w_buf.items; | 723 | const want = w_buf.items; |
| 724 | var got = try n.nfkd(allocator, input); | 724 | const got = try n.nfkd(allocator, input); |
| 725 | defer got.deinit(); | 725 | defer got.deinit(); |
| 726 | 726 | ||
| 727 | try testing.expectEqualStrings(want, got.slice); | 727 | try testing.expectEqualStrings(want, got.slice); |
diff --git a/src/NumericData.zig b/src/NumericData.zig index baf8f11..210d623 100644 --- a/src/NumericData.zig +++ b/src/NumericData.zig | |||
| @@ -33,7 +33,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 33 | return self; | 33 | return self; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | pub fn deinit(self: *Self) void { | 36 | pub fn deinit(self: *const Self) void { |
| 37 | self.allocator.free(self.s1); | 37 | self.allocator.free(self.s1); |
| 38 | self.allocator.free(self.s2); | 38 | self.allocator.free(self.s2); |
| 39 | } | 39 | } |
| @@ -59,7 +59,7 @@ pub inline fn isDecimal(self: Self, cp: u21) bool { | |||
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | test "isDecimal" { | 61 | test "isDecimal" { |
| 62 | var self = try init(testing.allocator); | 62 | const self = try init(testing.allocator); |
| 63 | defer self.deinit(); | 63 | defer self.deinit(); |
| 64 | 64 | ||
| 65 | try testing.expect(self.isNumber('\u{277f}')); | 65 | try testing.expect(self.isNumber('\u{277f}')); |
diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig new file mode 100644 index 0000000..ac1c46a --- /dev/null +++ b/src/ScriptsData.zig | |||
| @@ -0,0 +1,226 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | const compress = std.compress; | ||
| 4 | const mem = std.mem; | ||
| 5 | const testing = std.testing; | ||
| 6 | |||
| 7 | /// Script | ||
| 8 | pub const Script = enum { | ||
| 9 | none, | ||
| 10 | Adlam, | ||
| 11 | Ahom, | ||
| 12 | Anatolian_Hieroglyphs, | ||
| 13 | Arabic, | ||
| 14 | Armenian, | ||
| 15 | Avestan, | ||
| 16 | Balinese, | ||
| 17 | Bamum, | ||
| 18 | Bassa_Vah, | ||
| 19 | Batak, | ||
| 20 | Bengali, | ||
| 21 | Bhaiksuki, | ||
| 22 | Bopomofo, | ||
| 23 | Brahmi, | ||
| 24 | Braille, | ||
| 25 | Buginese, | ||
| 26 | Buhid, | ||
| 27 | Canadian_Aboriginal, | ||
| 28 | Carian, | ||
| 29 | Caucasian_Albanian, | ||
| 30 | Chakma, | ||
| 31 | Cham, | ||
| 32 | Cherokee, | ||
| 33 | Chorasmian, | ||
| 34 | Common, | ||
| 35 | Coptic, | ||
| 36 | Cuneiform, | ||
| 37 | Cypriot, | ||
| 38 | Cypro_Minoan, | ||
| 39 | Cyrillic, | ||
| 40 | Deseret, | ||
| 41 | Devanagari, | ||
| 42 | Dives_Akuru, | ||
| 43 | Dogra, | ||
| 44 | Duployan, | ||
| 45 | Egyptian_Hieroglyphs, | ||
| 46 | Elbasan, | ||
| 47 | Elymaic, | ||
| 48 | Ethiopic, | ||
| 49 | Georgian, | ||
| 50 | Glagolitic, | ||
| 51 | Gothic, | ||
| 52 | Grantha, | ||
| 53 | Greek, | ||
| 54 | Gujarati, | ||
| 55 | Gunjala_Gondi, | ||
| 56 | Gurmukhi, | ||
| 57 | Han, | ||
| 58 | Hangul, | ||
| 59 | Hanifi_Rohingya, | ||
| 60 | Hanunoo, | ||
| 61 | Hatran, | ||
| 62 | Hebrew, | ||
| 63 | Hiragana, | ||
| 64 | Imperial_Aramaic, | ||
| 65 | Inherited, | ||
| 66 | Inscriptional_Pahlavi, | ||
| 67 | Inscriptional_Parthian, | ||
| 68 | Javanese, | ||
| 69 | Kaithi, | ||
| 70 | Kannada, | ||
| 71 | Katakana, | ||
| 72 | Kawi, | ||
| 73 | Kayah_Li, | ||
| 74 | Kharoshthi, | ||
| 75 | Khitan_Small_Script, | ||
| 76 | Khmer, | ||
| 77 | Khojki, | ||
| 78 | Khudawadi, | ||
| 79 | Lao, | ||
| 80 | Latin, | ||
| 81 | Lepcha, | ||
| 82 | Limbu, | ||
| 83 | Linear_A, | ||
| 84 | Linear_B, | ||
| 85 | Lisu, | ||
| 86 | Lycian, | ||
| 87 | Lydian, | ||
| 88 | Mahajani, | ||
| 89 | Makasar, | ||
| 90 | Malayalam, | ||
| 91 | Mandaic, | ||
| 92 | Manichaean, | ||
| 93 | Marchen, | ||
| 94 | Masaram_Gondi, | ||
| 95 | Medefaidrin, | ||
| 96 | Meetei_Mayek, | ||
| 97 | Mende_Kikakui, | ||
| 98 | Meroitic_Cursive, | ||
| 99 | Meroitic_Hieroglyphs, | ||
| 100 | Miao, | ||
| 101 | Modi, | ||
| 102 | Mongolian, | ||
| 103 | Mro, | ||
| 104 | Multani, | ||
| 105 | Myanmar, | ||
| 106 | Nabataean, | ||
| 107 | Nag_Mundari, | ||
| 108 | Nandinagari, | ||
| 109 | New_Tai_Lue, | ||
| 110 | Newa, | ||
| 111 | Nko, | ||
| 112 | Nushu, | ||
| 113 | Nyiakeng_Puachue_Hmong, | ||
| 114 | Ogham, | ||
| 115 | Ol_Chiki, | ||
| 116 | Old_Hungarian, | ||
| 117 | Old_Italic, | ||
| 118 | Old_North_Arabian, | ||
| 119 | Old_Permic, | ||
| 120 | Old_Persian, | ||
| 121 | Old_Sogdian, | ||
| 122 | Old_South_Arabian, | ||
| 123 | Old_Turkic, | ||
| 124 | Old_Uyghur, | ||
| 125 | Oriya, | ||
| 126 | Osage, | ||
| 127 | Osmanya, | ||
| 128 | Pahawh_Hmong, | ||
| 129 | Palmyrene, | ||
| 130 | Pau_Cin_Hau, | ||
| 131 | Phags_Pa, | ||
| 132 | Phoenician, | ||
| 133 | Psalter_Pahlavi, | ||
| 134 | Rejang, | ||
| 135 | Runic, | ||
| 136 | Samaritan, | ||
| 137 | Saurashtra, | ||
| 138 | Sharada, | ||
| 139 | Shavian, | ||
| 140 | Siddham, | ||
| 141 | SignWriting, | ||
| 142 | Sinhala, | ||
| 143 | Sogdian, | ||
| 144 | Sora_Sompeng, | ||
| 145 | Soyombo, | ||
| 146 | Sundanese, | ||
| 147 | Syloti_Nagri, | ||
| 148 | Syriac, | ||
| 149 | Tagalog, | ||
| 150 | Tagbanwa, | ||
| 151 | Tai_Le, | ||
| 152 | Tai_Tham, | ||
| 153 | Tai_Viet, | ||
| 154 | Takri, | ||
| 155 | Tamil, | ||
| 156 | Tangsa, | ||
| 157 | Tangut, | ||
| 158 | Telugu, | ||
| 159 | Thaana, | ||
| 160 | Thai, | ||
| 161 | Tibetan, | ||
| 162 | Tifinagh, | ||
| 163 | Tirhuta, | ||
| 164 | Toto, | ||
| 165 | Ugaritic, | ||
| 166 | Vai, | ||
| 167 | Vithkuqi, | ||
| 168 | Wancho, | ||
| 169 | Warang_Citi, | ||
| 170 | Yezidi, | ||
| 171 | Yi, | ||
| 172 | Zanabazar_Square, | ||
| 173 | }; | ||
| 174 | |||
| 175 | allocator: mem.Allocator, | ||
| 176 | s1: []u16 = undefined, | ||
| 177 | s2: []u8 = undefined, | ||
| 178 | s3: []u8 = undefined, | ||
| 179 | |||
| 180 | const Self = @This(); | ||
| 181 | |||
| 182 | pub fn init(allocator: mem.Allocator) !Self { | ||
| 183 | const decompressor = compress.deflate.decompressor; | ||
| 184 | const in_bytes = @embedFile("scripts"); | ||
| 185 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 186 | var in_decomp = try decompressor(allocator, in_fbs.reader(), null); | ||
| 187 | defer in_decomp.deinit(); | ||
| 188 | var reader = in_decomp.reader(); | ||
| 189 | |||
| 190 | const endian = builtin.cpu.arch.endian(); | ||
| 191 | |||
| 192 | var self = Self{ .allocator = allocator }; | ||
| 193 | |||
| 194 | const s1_len: u16 = try reader.readInt(u16, endian); | ||
| 195 | self.s1 = try allocator.alloc(u16, s1_len); | ||
| 196 | for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); | ||
| 197 | |||
| 198 | const s2_len: u16 = try reader.readInt(u16, endian); | ||
| 199 | self.s2 = try allocator.alloc(u8, s2_len); | ||
| 200 | _ = try reader.readAll(self.s2); | ||
| 201 | |||
| 202 | const s3_len: u16 = try reader.readInt(u8, endian); | ||
| 203 | self.s3 = try allocator.alloc(u8, s3_len); | ||
| 204 | _ = try reader.readAll(self.s3); | ||
| 205 | |||
| 206 | return self; | ||
| 207 | } | ||
| 208 | |||
| 209 | pub fn deinit(self: *const Self) void { | ||
| 210 | self.allocator.free(self.s1); | ||
| 211 | self.allocator.free(self.s2); | ||
| 212 | self.allocator.free(self.s3); | ||
| 213 | } | ||
| 214 | |||
| 215 | /// Lookup the Script type for `cp`. | ||
| 216 | pub fn script(self: Self, cp: u21) ?Script { | ||
| 217 | const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; | ||
| 218 | if (byte == 0) return null; | ||
| 219 | return @enumFromInt(byte); | ||
| 220 | } | ||
| 221 | |||
| 222 | test "script" { | ||
| 223 | const self = try init(std.testing.allocator); | ||
| 224 | defer self.deinit(); | ||
| 225 | try testing.expectEqual(Script.Latin, self.script('A').?); | ||
| 226 | } | ||
diff --git a/src/WidthData.zig b/src/WidthData.zig index 32f8658..d17f0cd 100644 --- a/src/WidthData.zig +++ b/src/WidthData.zig | |||
| @@ -39,7 +39,7 @@ pub fn init(allocator: mem.Allocator) !Self { | |||
| 39 | return self; | 39 | return self; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | pub fn deinit(self: *Self) void { | 42 | pub fn deinit(self: *const Self) void { |
| 43 | self.allocator.free(self.s1); | 43 | self.allocator.free(self.s1); |
| 44 | self.allocator.free(self.s2); | 44 | self.allocator.free(self.s2); |
| 45 | self.g_data.deinit(); | 45 | self.g_data.deinit(); |
diff --git a/src/grapheme.zig b/src/grapheme.zig index e55a6a4..ad43cfd 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -237,7 +237,7 @@ test "Segmentation GraphemeIterator" { | |||
| 237 | var buf_reader = std.io.bufferedReader(file.reader()); | 237 | var buf_reader = std.io.bufferedReader(file.reader()); |
| 238 | var input_stream = buf_reader.reader(); | 238 | var input_stream = buf_reader.reader(); |
| 239 | 239 | ||
| 240 | var data = try GraphemeData.init(allocator); | 240 | const data = try GraphemeData.init(allocator); |
| 241 | defer data.deinit(); | 241 | defer data.deinit(); |
| 242 | 242 | ||
| 243 | var buf: [4096]u8 = undefined; | 243 | var buf: [4096]u8 = undefined; |
| @@ -302,7 +302,7 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 302 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; | 302 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; |
| 303 | const no_joiner = seq_1 ++ seq_2; | 303 | const no_joiner = seq_1 ++ seq_2; |
| 304 | 304 | ||
| 305 | var data = try GraphemeData.init(std.testing.allocator); | 305 | const data = try GraphemeData.init(std.testing.allocator); |
| 306 | defer data.deinit(); | 306 | defer data.deinit(); |
| 307 | 307 | ||
| 308 | var iter = Iterator.init(with_zwj, &data); | 308 | var iter = Iterator.init(with_zwj, &data); |