diff options
| author | 2026-02-04 18:36:18 -0500 | |
|---|---|---|
| committer | 2026-02-04 18:36:18 -0500 | |
| commit | e476250ea9326b2550847b301c265115ff375a31 (patch) | |
| tree | cf627ced47cecce80020b7a1f30aa51852c0c59b | |
| parent | Normalization and case folding (diff) | |
| download | zg-e476250ea9326b2550847b301c265115ff375a31.tar.gz zg-e476250ea9326b2550847b301c265115ff375a31.tar.xz zg-e476250ea9326b2550847b301c265115ff375a31.zip | |
Rest of the 'easy' stuff
This gets us up to feature parity with Jacob's work. I want to
eliminate that last allocation using the comptime hash map, and then
see about eliminating allocations from case comparisons as well.
That should just about do it.
| -rw-r--r-- | build.zig | 57 | ||||
| -rw-r--r-- | codegen/case.zig | 145 | ||||
| -rw-r--r-- | codegen/core_props.zig | 30 | ||||
| -rw-r--r-- | codegen/gencat.zig | 37 | ||||
| -rw-r--r-- | codegen/numeric.zig | 24 | ||||
| -rw-r--r-- | codegen/props.zig | 23 | ||||
| -rw-r--r-- | codegen/scripts.zig | 31 | ||||
| -rw-r--r-- | src/GeneralCategories.zig | 102 | ||||
| -rw-r--r-- | src/LetterCasing.zig | 179 | ||||
| -rw-r--r-- | src/Properties.zig | 195 | ||||
| -rw-r--r-- | src/Scripts.zig | 82 |
11 files changed, 430 insertions, 475 deletions
| @@ -151,7 +151,7 @@ pub fn build(b: *std.Build) void { | |||
| 151 | }); | 151 | }); |
| 152 | gencat_gen_exe.root_module.addAnonymousImport("DerivedGeneralCategory.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedGeneralCategory.txt") }); | 152 | gencat_gen_exe.root_module.addAnonymousImport("DerivedGeneralCategory.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedGeneralCategory.txt") }); |
| 153 | const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe); | 153 | const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe); |
| 154 | const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.bin.z"); | 154 | const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.zig"); |
| 155 | 155 | ||
| 156 | const fold_gen_exe = b.addExecutable(.{ | 156 | const fold_gen_exe = b.addExecutable(.{ |
| 157 | .name = "fold", | 157 | .name = "fold", |
| @@ -177,47 +177,21 @@ pub fn build(b: *std.Build) void { | |||
| 177 | }); | 177 | }); |
| 178 | num_gen_exe.root_module.addAnonymousImport("DerivedNumericType.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedNumericType.txt") }); | 178 | num_gen_exe.root_module.addAnonymousImport("DerivedNumericType.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedNumericType.txt") }); |
| 179 | const run_num_gen_exe = b.addRunArtifact(num_gen_exe); | 179 | const run_num_gen_exe = b.addRunArtifact(num_gen_exe); |
| 180 | const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.bin.z"); | 180 | const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.zig"); |
| 181 | 181 | ||
| 182 | // Letter case properties | 182 | // Case mappings |
| 183 | const case_prop_gen_exe = b.addExecutable(.{ | 183 | const case_gen_exe = b.addExecutable(.{ |
| 184 | .name = "case_prop", | 184 | .name = "case", |
| 185 | .root_module = b.createModule(.{ | 185 | .root_module = b.createModule(.{ |
| 186 | .root_source_file = b.path("codegen/case_prop.zig"), | 186 | .root_source_file = b.path("codegen/case.zig"), |
| 187 | .target = b.graph.host, | 187 | .target = b.graph.host, |
| 188 | .optimize = .Debug, | 188 | .optimize = .Debug, |
| 189 | }), | 189 | }), |
| 190 | }); | 190 | }); |
| 191 | case_prop_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); | 191 | case_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); |
| 192 | const run_case_prop_gen_exe = b.addRunArtifact(case_prop_gen_exe); | ||
| 193 | const case_prop_gen_out = run_case_prop_gen_exe.addOutputFileArg("case_prop.bin.z"); | ||
| 194 | |||
| 195 | // Uppercase mappings | ||
| 196 | const upper_gen_exe = b.addExecutable(.{ | ||
| 197 | .name = "upper", | ||
| 198 | .root_module = b.createModule(.{ | ||
| 199 | .root_source_file = b.path("codegen/upper.zig"), | ||
| 200 | .target = b.graph.host, | ||
| 201 | .optimize = .Debug, | ||
| 202 | }), | ||
| 203 | }); | ||
| 204 | upper_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); | ||
| 205 | const run_upper_gen_exe = b.addRunArtifact(upper_gen_exe); | ||
| 206 | const upper_gen_out = run_upper_gen_exe.addOutputFileArg("upper.bin.z"); | ||
| 207 | |||
| 208 | // Lowercase mappings | ||
| 209 | const lower_gen_exe = b.addExecutable(.{ | ||
| 210 | .name = "lower", | ||
| 211 | .root_module = b.createModule(.{ | ||
| 212 | .root_source_file = b.path("codegen/lower.zig"), | ||
| 213 | .target = b.graph.host, | ||
| 214 | .optimize = .Debug, | ||
| 215 | }), | ||
| 216 | }); | ||
| 217 | lower_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") }); | ||
| 218 | const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe); | ||
| 219 | const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z"); | ||
| 220 | 192 | ||
| 193 | const run_case_gen_exe = b.addRunArtifact(case_gen_exe); | ||
| 194 | const case_gen_out = run_case_gen_exe.addOutputFileArg("case.zig"); | ||
| 221 | const scripts_gen_exe = b.addExecutable(.{ | 195 | const scripts_gen_exe = b.addExecutable(.{ |
| 222 | .name = "scripts", | 196 | .name = "scripts", |
| 223 | .root_module = b.createModule(.{ | 197 | .root_module = b.createModule(.{ |
| @@ -226,9 +200,10 @@ pub fn build(b: *std.Build) void { | |||
| 226 | .optimize = .Debug, | 200 | .optimize = .Debug, |
| 227 | }), | 201 | }), |
| 228 | }); | 202 | }); |
| 203 | |||
| 229 | scripts_gen_exe.root_module.addAnonymousImport("Scripts.txt", .{ .root_source_file = b.path("data/unicode/Scripts.txt") }); | 204 | scripts_gen_exe.root_module.addAnonymousImport("Scripts.txt", .{ .root_source_file = b.path("data/unicode/Scripts.txt") }); |
| 230 | const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe); | 205 | const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe); |
| 231 | const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z"); | 206 | const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.zig"); |
| 232 | 207 | ||
| 233 | const core_gen_exe = b.addExecutable(.{ | 208 | const core_gen_exe = b.addExecutable(.{ |
| 234 | .name = "core", | 209 | .name = "core", |
| @@ -240,7 +215,7 @@ pub fn build(b: *std.Build) void { | |||
| 240 | }); | 215 | }); |
| 241 | core_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); | 216 | core_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") }); |
| 242 | const run_core_gen_exe = b.addRunArtifact(core_gen_exe); | 217 | const run_core_gen_exe = b.addRunArtifact(core_gen_exe); |
| 243 | const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z"); | 218 | const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.zig"); |
| 244 | 219 | ||
| 245 | const props_gen_exe = b.addExecutable(.{ | 220 | const props_gen_exe = b.addExecutable(.{ |
| 246 | .name = "props", | 221 | .name = "props", |
| @@ -253,7 +228,7 @@ pub fn build(b: *std.Build) void { | |||
| 253 | 228 | ||
| 254 | props_gen_exe.root_module.addAnonymousImport("PropList.txt", .{ .root_source_file = b.path("data/unicode/PropList.txt") }); | 229 | props_gen_exe.root_module.addAnonymousImport("PropList.txt", .{ .root_source_file = b.path("data/unicode/PropList.txt") }); |
| 255 | const run_props_gen_exe = b.addRunArtifact(props_gen_exe); | 230 | const run_props_gen_exe = b.addRunArtifact(props_gen_exe); |
| 256 | const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z"); | 231 | const props_gen_out = run_props_gen_exe.addOutputFileArg("props.zig"); |
| 257 | 232 | ||
| 258 | // Modules we provide | 233 | // Modules we provide |
| 259 | 234 | ||
| @@ -457,9 +432,7 @@ pub fn build(b: *std.Build) void { | |||
| 457 | .optimize = optimize, | 432 | .optimize = optimize, |
| 458 | }); | 433 | }); |
| 459 | letter_case.addImport("code_point", code_point); | 434 | letter_case.addImport("code_point", code_point); |
| 460 | letter_case.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out }); | 435 | letter_case.addAnonymousImport("case", .{ .root_source_file = case_gen_out }); |
| 461 | letter_case.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out }); | ||
| 462 | letter_case.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out }); | ||
| 463 | 436 | ||
| 464 | const letter_case_t = b.addTest(.{ | 437 | const letter_case_t = b.addTest(.{ |
| 465 | .name = "lettercase", | 438 | .name = "lettercase", |
| @@ -473,7 +446,7 @@ pub fn build(b: *std.Build) void { | |||
| 473 | .target = target, | 446 | .target = target, |
| 474 | .optimize = optimize, | 447 | .optimize = optimize, |
| 475 | }); | 448 | }); |
| 476 | scripts.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); | 449 | scripts.addAnonymousImport("script", .{ .root_source_file = scripts_gen_out }); |
| 477 | 450 | ||
| 478 | const scripts_t = b.addTest(.{ | 451 | const scripts_t = b.addTest(.{ |
| 479 | .name = "scripts", | 452 | .name = "scripts", |
diff --git a/codegen/case.zig b/codegen/case.zig new file mode 100644 index 0000000..9dffc7c --- /dev/null +++ b/codegen/case.zig | |||
| @@ -0,0 +1,145 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | const builtin = @import("builtin"); | ||
| 3 | |||
| 4 | const block_size = 256; | ||
| 5 | const Block = [block_size]u44; | ||
| 6 | |||
| 7 | comptime { | ||
| 8 | if (@bitSizeOf(u44) != 2 * @bitSizeOf(u21) + 2) { | ||
| 9 | @compileError("u44 doesn't have expected bit size."); | ||
| 10 | } | ||
| 11 | } | ||
| 12 | |||
| 13 | const BlockMap = std.HashMap( | ||
| 14 | Block, | ||
| 15 | u16, | ||
| 16 | struct { | ||
| 17 | pub fn hash(_: @This(), k: Block) u64 { | ||
| 18 | var hasher = std.hash.Wyhash.init(0); | ||
| 19 | std.hash.autoHashStrat(&hasher, k, .DeepRecursive); | ||
| 20 | return hasher.final(); | ||
| 21 | } | ||
| 22 | |||
| 23 | pub fn eql(_: @This(), a: Block, b: Block) bool { | ||
| 24 | return std.mem.eql(u44, &a, &b); | ||
| 25 | } | ||
| 26 | }, | ||
| 27 | std.hash_map.default_max_load_percentage, | ||
| 28 | ); | ||
| 29 | |||
| 30 | pub fn main() !void { | ||
| 31 | var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); | ||
| 32 | defer arena.deinit(); | ||
| 33 | const allocator = arena.allocator(); | ||
| 34 | |||
| 35 | var lower_map = std.AutoHashMap(u21, u21).init(allocator); | ||
| 36 | defer lower_map.deinit(); | ||
| 37 | |||
| 38 | var upper_map = std.AutoHashMap(u21, u21).init(allocator); | ||
| 39 | defer upper_map.deinit(); | ||
| 40 | |||
| 41 | // Process UnicodeData.txt | ||
| 42 | |||
| 43 | var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt")); | ||
| 44 | while (in_reader.takeDelimiterInclusive('\n')) |line| { | ||
| 45 | if (line.len == 0) continue; | ||
| 46 | |||
| 47 | var field_iter = std.mem.splitScalar(u8, line, ';'); | ||
| 48 | var cp: u21 = undefined; | ||
| 49 | |||
| 50 | var i: usize = 0; | ||
| 51 | while (field_iter.next()) |field| : (i += 1) { | ||
| 52 | if (field.len == 0) continue; | ||
| 53 | |||
| 54 | switch (i) { | ||
| 55 | 0 => cp = try std.fmt.parseInt(u21, field, 16), | ||
| 56 | |||
| 57 | 12 => { | ||
| 58 | // Uppercase mapping | ||
| 59 | try upper_map.put(cp, try std.fmt.parseInt(u21, field, 16)); | ||
| 60 | }, | ||
| 61 | |||
| 62 | 13 => { | ||
| 63 | // Lowercase mapping | ||
| 64 | try lower_map.put(cp, try std.fmt.parseInt(u21, field, 16)); | ||
| 65 | }, | ||
| 66 | |||
| 67 | else => {}, | ||
| 68 | } | ||
| 69 | } | ||
| 70 | } else |err| switch (err) { | ||
| 71 | error.EndOfStream => {}, | ||
| 72 | else => { | ||
| 73 | return err; | ||
| 74 | }, | ||
| 75 | } | ||
| 76 | |||
| 77 | var blocks_map = BlockMap.init(allocator); | ||
| 78 | defer blocks_map.deinit(); | ||
| 79 | |||
| 80 | var stage1 = std.array_list.Managed(u16).init(allocator); | ||
| 81 | defer stage1.deinit(); | ||
| 82 | |||
| 83 | var stage2 = std.array_list.Managed(u44).init(allocator); | ||
| 84 | defer stage2.deinit(); | ||
| 85 | |||
| 86 | var block: Block = [_]u44{0} ** block_size; | ||
| 87 | var block_len: u16 = 0; | ||
| 88 | |||
| 89 | for (0..0x110000) |i| { | ||
| 90 | const cp: u21 = @intCast(i); | ||
| 91 | var case_prop: u44 = 0; | ||
| 92 | |||
| 93 | if (lower_map.get(cp)) |lower| { | ||
| 94 | case_prop |= @as(u44, lower) << 2 | 1; | ||
| 95 | } | ||
| 96 | |||
| 97 | if (upper_map.get(cp)) |upper| { | ||
| 98 | case_prop |= @as(u44, upper) << (2 + 21) | 2; | ||
| 99 | } | ||
| 100 | |||
| 101 | block[block_len] = case_prop; | ||
| 102 | block_len += 1; | ||
| 103 | |||
| 104 | if (block_len < block_size and cp != 0x10ffff) continue; | ||
| 105 | |||
| 106 | const gop = try blocks_map.getOrPut(block); | ||
| 107 | if (!gop.found_existing) { | ||
| 108 | gop.value_ptr.* = @intCast(stage2.items.len); | ||
| 109 | try stage2.appendSlice(&block); | ||
| 110 | } | ||
| 111 | |||
| 112 | try stage1.append(gop.value_ptr.*); | ||
| 113 | block_len = 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | var args_iter = try std.process.argsWithAllocator(allocator); | ||
| 117 | defer args_iter.deinit(); | ||
| 118 | _ = args_iter.skip(); | ||
| 119 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | ||
| 120 | |||
| 121 | var write_buf: [4096]u8 = undefined; | ||
| 122 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | ||
| 123 | defer out_file.close(); | ||
| 124 | var writer = out_file.writer(&write_buf); | ||
| 125 | |||
| 126 | try writer.interface.print( | ||
| 127 | \\//! This file is auto-generated. Do not edit. | ||
| 128 | \\ | ||
| 129 | \\pub const s1: [{}]u16 = .{{ | ||
| 130 | , .{stage1.items.len}); | ||
| 131 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 132 | |||
| 133 | try writer.interface.print( | ||
| 134 | \\ | ||
| 135 | \\}}; | ||
| 136 | \\ | ||
| 137 | \\pub const s2: [{}]u44 = .{{ | ||
| 138 | , .{stage2.items.len}); | ||
| 139 | for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 140 | |||
| 141 | try writer.interface.writeAll( | ||
| 142 | \\}; | ||
| 143 | ); | ||
| 144 | try writer.interface.flush(); | ||
| 145 | } | ||
diff --git a/codegen/core_props.zig b/codegen/core_props.zig index 6ffdf91..99a55e2 100644 --- a/codegen/core_props.zig +++ b/codegen/core_props.zig | |||
| @@ -120,17 +120,29 @@ pub fn main() anyerror!void { | |||
| 120 | _ = args_iter.skip(); | 120 | _ = args_iter.skip(); |
| 121 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 121 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 122 | 122 | ||
| 123 | var out_buf: [4096]u8 = undefined; | 123 | var write_buf: [4096]u8 = undefined; |
| 124 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 124 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 125 | defer out_file.close(); | 125 | defer out_file.close(); |
| 126 | var writer = out_file.writer(&out_buf); | 126 | var writer = out_file.writer(&write_buf); |
| 127 | 127 | ||
| 128 | const endian = builtin.cpu.arch.endian(); | 128 | try writer.interface.print( |
| 129 | try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); | 129 | \\//! This file is auto-generated. Do not edit. |
| 130 | for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); | 130 | \\ |
| 131 | 131 | \\pub const s1: [{}]u16 = .{{ | |
| 132 | try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); | 132 | , .{stage1.items.len}); |
| 133 | try writer.interface.writeAll(stage2.items); | 133 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); |
| 134 | |||
| 135 | try writer.interface.print( | ||
| 136 | \\ | ||
| 137 | \\}}; | ||
| 138 | \\ | ||
| 139 | \\pub const s2: [{}]u8 = .{{ | ||
| 140 | , .{stage2.items.len}); | ||
| 141 | for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 142 | |||
| 143 | try writer.interface.writeAll( | ||
| 144 | \\}; | ||
| 145 | ); | ||
| 134 | 146 | ||
| 135 | try writer.interface.flush(); | 147 | try writer.interface.flush(); |
| 136 | } | 148 | } |
diff --git a/codegen/gencat.zig b/codegen/gencat.zig index 9800f1d..12c8373 100644 --- a/codegen/gencat.zig +++ b/codegen/gencat.zig | |||
| @@ -150,21 +150,38 @@ pub fn main() !void { | |||
| 150 | defer args_iter.deinit(); | 150 | defer args_iter.deinit(); |
| 151 | _ = args_iter.skip(); | 151 | _ = args_iter.skip(); |
| 152 | const output_path = args_iter.next() orelse @panic("No output file arg!"); | 152 | const output_path = args_iter.next() orelse @panic("No output file arg!"); |
| 153 | |||
| 154 | var write_buf: [4096]u8 = undefined; | 153 | var write_buf: [4096]u8 = undefined; |
| 155 | var out_file = try std.fs.cwd().createFile(output_path, .{}); | 154 | var out_file = try std.fs.cwd().createFile(output_path, .{}); |
| 156 | defer out_file.close(); | 155 | defer out_file.close(); |
| 157 | var writer = out_file.writer(&write_buf); | 156 | var writer = out_file.writer(&write_buf); |
| 158 | 157 | ||
| 159 | const endian = builtin.cpu.arch.endian(); | 158 | try writer.interface.print( |
| 160 | try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); | 159 | \\//! This file is auto-generated. Do not edit. |
| 161 | for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); | 160 | \\ |
| 162 | 161 | \\pub const s1: [{}]u16 = .{{ | |
| 163 | try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); | 162 | , .{stage1.items.len}); |
| 164 | for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian); | 163 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); |
| 165 | 164 | ||
| 166 | try writer.interface.writeInt(u8, @intCast(stage3.items.len), endian); | 165 | try writer.interface.print( |
| 167 | for (stage3.items) |i| try writer.interface.writeInt(u8, i, endian); | 166 | \\ |
| 167 | \\}}; | ||
| 168 | \\ | ||
| 169 | \\pub const stage2: [{}]u5 = .{{ | ||
| 170 | , .{stage2.items.len}); | ||
| 171 | for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 172 | |||
| 173 | try writer.interface.print( | ||
| 174 | \\ | ||
| 175 | \\}}; | ||
| 176 | \\ | ||
| 177 | \\pub const stage3: [{}]5 = .{{ | ||
| 178 | , .{stage3.items.len}); | ||
| 179 | for (stage3.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 180 | |||
| 181 | try writer.interface.writeAll( | ||
| 182 | \\}; | ||
| 183 | ); | ||
| 168 | 184 | ||
| 169 | try writer.interface.flush(); | 185 | try writer.interface.flush(); |
| 186 | try writer.interface.flush(); | ||
| 170 | } | 187 | } |
diff --git a/codegen/numeric.zig b/codegen/numeric.zig index b304349..e7b4861 100644 --- a/codegen/numeric.zig +++ b/codegen/numeric.zig | |||
| @@ -123,12 +123,24 @@ pub fn main() anyerror!void { | |||
| 123 | defer out_file.close(); | 123 | defer out_file.close(); |
| 124 | var writer = out_file.writer(&write_buf); | 124 | var writer = out_file.writer(&write_buf); |
| 125 | 125 | ||
| 126 | const endian = builtin.cpu.arch.endian(); | 126 | try writer.interface.print( |
| 127 | try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); | 127 | \\//! This file is auto-generated. Do not edit. |
| 128 | for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); | 128 | \\ |
| 129 | 129 | \\pub const s1: [{}]u16 = .{{ | |
| 130 | try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); | 130 | , .{stage1.items.len}); |
| 131 | try writer.interface.writeAll(stage2.items); | 131 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); |
| 132 | |||
| 133 | try writer.interface.print( | ||
| 134 | \\ | ||
| 135 | \\}}; | ||
| 136 | \\ | ||
| 137 | \\pub const s2: [{}]u8 = .{{ | ||
| 138 | , .{stage2.items.len}); | ||
| 139 | for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 140 | |||
| 141 | try writer.interface.writeAll( | ||
| 142 | \\}; | ||
| 143 | ); | ||
| 132 | 144 | ||
| 133 | try writer.interface.flush(); | 145 | try writer.interface.flush(); |
| 134 | } | 146 | } |
diff --git a/codegen/props.zig b/codegen/props.zig index 35c7dfb..ebd5116 100644 --- a/codegen/props.zig +++ b/codegen/props.zig | |||
| @@ -123,11 +123,24 @@ pub fn main() anyerror!void { | |||
| 123 | defer out_file.close(); | 123 | defer out_file.close(); |
| 124 | var writer = out_file.writer(&write_buf); | 124 | var writer = out_file.writer(&write_buf); |
| 125 | 125 | ||
| 126 | const endian = builtin.cpu.arch.endian(); | 126 | try writer.interface.print( |
| 127 | try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); | 127 | \\//! This file is auto-generated. Do not edit. |
| 128 | for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); | 128 | \\ |
| 129 | \\pub const s1: [{}]u16 = .{{ | ||
| 130 | , .{stage1.items.len}); | ||
| 131 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 132 | |||
| 133 | try writer.interface.print( | ||
| 134 | \\ | ||
| 135 | \\}}; | ||
| 136 | \\ | ||
| 137 | \\pub const s2: [{}]u8 = .{{ | ||
| 138 | , .{stage2.items.len}); | ||
| 139 | for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 140 | |||
| 141 | try writer.interface.writeAll( | ||
| 142 | \\}; | ||
| 143 | ); | ||
| 129 | 144 | ||
| 130 | try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); | ||
| 131 | try writer.interface.writeAll(stage2.items); | ||
| 132 | try writer.interface.flush(); | 145 | try writer.interface.flush(); |
| 133 | } | 146 | } |
diff --git a/codegen/scripts.zig b/codegen/scripts.zig index 0f0194c..6bd5866 100644 --- a/codegen/scripts.zig +++ b/codegen/scripts.zig | |||
| @@ -299,15 +299,32 @@ pub fn main() anyerror!void { | |||
| 299 | defer out_file.close(); | 299 | defer out_file.close(); |
| 300 | var writer = out_file.writer(&write_buf); | 300 | var writer = out_file.writer(&write_buf); |
| 301 | 301 | ||
| 302 | const endian = builtin.cpu.arch.endian(); | 302 | try writer.interface.print( |
| 303 | try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian); | 303 | \\//! This file is auto-generated. Do not edit. |
| 304 | for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian); | 304 | \\ |
| 305 | \\pub const s1: [{}]u16 = .{{ | ||
| 306 | , .{stage1.items.len}); | ||
| 307 | for (stage1.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 305 | 308 | ||
| 306 | try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian); | 309 | try writer.interface.print( |
| 307 | for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian); | 310 | \\ |
| 311 | \\}}; | ||
| 312 | \\ | ||
| 313 | \\pub const s2: [{}]u8 = .{{ | ||
| 314 | , .{stage2.items.len}); | ||
| 315 | for (stage2.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 308 | 316 | ||
| 309 | try writer.interface.writeInt(u8, @intCast(stage3.items.len), endian); | 317 | try writer.interface.print( |
| 310 | for (stage3.items) |i| try writer.interface.writeInt(u8, i, endian); | 318 | \\ |
| 319 | \\}}; | ||
| 320 | \\ | ||
| 321 | \\pub const s3: [{}]u8 = .{{ | ||
| 322 | , .{stage3.items.len}); | ||
| 323 | for (stage3.items) |entry| try writer.interface.print("{}, ", .{entry}); | ||
| 324 | |||
| 325 | try writer.interface.writeAll( | ||
| 326 | \\}; | ||
| 327 | ); | ||
| 311 | 328 | ||
| 312 | try writer.interface.flush(); | 329 | try writer.interface.flush(); |
| 313 | } | 330 | } |
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index eee7e56..9a383bf 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig | |||
| @@ -1,8 +1,19 @@ | |||
| 1 | //! General Categories | 1 | //! General Categories |
| 2 | 2 | ||
| 3 | s1: []u16 = undefined, | 3 | const Data = struct { |
| 4 | s2: []u5 = undefined, | 4 | s1: []const u16 = undefined, |
| 5 | s3: []u5 = undefined, | 5 | s2: []const u5 = undefined, |
| 6 | s3: []const u5 = undefined, | ||
| 7 | }; | ||
| 8 | |||
| 9 | const general_categories = general_categories: { | ||
| 10 | const data = @import("gencat"); | ||
| 11 | break :general_categories Data{ | ||
| 12 | .s1 = &data.s1, | ||
| 13 | .s2 = &data.s2, | ||
| 14 | .s3 = &data.s3, | ||
| 15 | }; | ||
| 16 | }; | ||
| 6 | 17 | ||
| 7 | /// General Category | 18 | /// General Category |
| 8 | pub const Gc = enum { | 19 | pub const Gc = enum { |
| @@ -38,51 +49,14 @@ pub const Gc = enum { | |||
| 38 | Zs, // Separator, Space | 49 | Zs, // Separator, Space |
| 39 | }; | 50 | }; |
| 40 | 51 | ||
| 41 | const GeneralCategories = @This(); | ||
| 42 | |||
| 43 | pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { | ||
| 44 | var gencat = GeneralCategories{}; | ||
| 45 | try gencat.setup(allocator); | ||
| 46 | return gencat; | ||
| 47 | } | ||
| 48 | |||
| 49 | pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { | ||
| 50 | const in_bytes = @embedFile("gencat"); | ||
| 51 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 52 | var reader = in_fbs.reader(); | ||
| 53 | |||
| 54 | const endian = builtin.cpu.arch.endian(); | ||
| 55 | |||
| 56 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 57 | gencat.s1 = try allocator.alloc(u16, s1_len); | ||
| 58 | errdefer allocator.free(gencat.s1); | ||
| 59 | for (0..s1_len) |i| gencat.s1[i] = reader.readInt(u16, endian) catch unreachable; | ||
| 60 | |||
| 61 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | ||
| 62 | gencat.s2 = try allocator.alloc(u5, s2_len); | ||
| 63 | errdefer allocator.free(gencat.s2); | ||
| 64 | for (0..s2_len) |i| gencat.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | ||
| 65 | |||
| 66 | const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; | ||
| 67 | gencat.s3 = try allocator.alloc(u5, s3_len); | ||
| 68 | errdefer allocator.free(gencat.s3); | ||
| 69 | for (0..s3_len) |i| gencat.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | ||
| 70 | } | ||
| 71 | |||
| 72 | pub fn deinit(gencat: *const GeneralCategories, allocator: mem.Allocator) void { | ||
| 73 | allocator.free(gencat.s1); | ||
| 74 | allocator.free(gencat.s2); | ||
| 75 | allocator.free(gencat.s3); | ||
| 76 | } | ||
| 77 | |||
| 78 | /// Lookup the General Category for `cp`. | 52 | /// Lookup the General Category for `cp`. |
| 79 | pub fn gc(gencat: GeneralCategories, cp: u21) Gc { | 53 | pub fn gc(cp: u21) Gc { |
| 80 | return @enumFromInt(gencat.s3[gencat.s2[gencat.s1[cp >> 8] + (cp & 0xff)]]); | 54 | return @enumFromInt(general_categories.s3[general_categories.s2[general_categories.s1[cp >> 8] + (cp & 0xff)]]); |
| 81 | } | 55 | } |
| 82 | 56 | ||
| 83 | /// True if `cp` has an C general category. | 57 | /// True if `cp` has an C general category. |
| 84 | pub fn isControl(gencat: GeneralCategories, cp: u21) bool { | 58 | pub fn isControl(cp: u21) bool { |
| 85 | return switch (gencat.gc(cp)) { | 59 | return switch (gc(cp)) { |
| 86 | .Cc, | 60 | .Cc, |
| 87 | .Cf, | 61 | .Cf, |
| 88 | .Cn, | 62 | .Cn, |
| @@ -94,8 +68,8 @@ pub fn isControl(gencat: GeneralCategories, cp: u21) bool { | |||
| 94 | } | 68 | } |
| 95 | 69 | ||
| 96 | /// True if `cp` has an L general category. | 70 | /// True if `cp` has an L general category. |
| 97 | pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { | 71 | pub fn isLetter(cp: u21) bool { |
| 98 | return switch (gencat.gc(cp)) { | 72 | return switch (gc(cp)) { |
| 99 | .Ll, | 73 | .Ll, |
| 100 | .Lm, | 74 | .Lm, |
| 101 | .Lo, | 75 | .Lo, |
| @@ -107,8 +81,8 @@ pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { | |||
| 107 | } | 81 | } |
| 108 | 82 | ||
| 109 | /// True if `cp` has an M general category. | 83 | /// True if `cp` has an M general category. |
| 110 | pub fn isMark(gencat: GeneralCategories, cp: u21) bool { | 84 | pub fn isMark(cp: u21) bool { |
| 111 | return switch (gencat.gc(cp)) { | 85 | return switch (gc(cp)) { |
| 112 | .Mc, | 86 | .Mc, |
| 113 | .Me, | 87 | .Me, |
| 114 | .Mn, | 88 | .Mn, |
| @@ -118,8 +92,8 @@ pub fn isMark(gencat: GeneralCategories, cp: u21) bool { | |||
| 118 | } | 92 | } |
| 119 | 93 | ||
| 120 | /// True if `cp` has an N general category. | 94 | /// True if `cp` has an N general category. |
| 121 | pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { | 95 | pub fn isNumber(cp: u21) bool { |
| 122 | return switch (gencat.gc(cp)) { | 96 | return switch (gc(cp)) { |
| 123 | .Nd, | 97 | .Nd, |
| 124 | .Nl, | 98 | .Nl, |
| 125 | .No, | 99 | .No, |
| @@ -129,8 +103,8 @@ pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { | |||
| 129 | } | 103 | } |
| 130 | 104 | ||
| 131 | /// True if `cp` has an P general category. | 105 | /// True if `cp` has an P general category. |
| 132 | pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { | 106 | pub fn isPunctuation(cp: u21) bool { |
| 133 | return switch (gencat.gc(cp)) { | 107 | return switch (gc(cp)) { |
| 134 | .Pc, | 108 | .Pc, |
| 135 | .Pd, | 109 | .Pd, |
| 136 | .Pe, | 110 | .Pe, |
| @@ -144,8 +118,8 @@ pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { | |||
| 144 | } | 118 | } |
| 145 | 119 | ||
| 146 | /// True if `cp` has an S general category. | 120 | /// True if `cp` has an S general category. |
| 147 | pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { | 121 | pub fn isSymbol(cp: u21) bool { |
| 148 | return switch (gencat.gc(cp)) { | 122 | return switch (gc(cp)) { |
| 149 | .Sc, | 123 | .Sc, |
| 150 | .Sk, | 124 | .Sk, |
| 151 | .Sm, | 125 | .Sm, |
| @@ -156,8 +130,8 @@ pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { | |||
| 156 | } | 130 | } |
| 157 | 131 | ||
| 158 | /// True if `cp` has an Z general category. | 132 | /// True if `cp` has an Z general category. |
| 159 | pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { | 133 | pub fn isSeparator(cp: u21) bool { |
| 160 | return switch (gencat.gc(cp)) { | 134 | return switch (gc(cp)) { |
| 161 | .Zl, | 135 | .Zl, |
| 162 | .Zp, | 136 | .Zp, |
| 163 | .Zs, | 137 | .Zs, |
| @@ -165,19 +139,3 @@ pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { | |||
| 165 | else => false, | 139 | else => false, |
| 166 | }; | 140 | }; |
| 167 | } | 141 | } |
| 168 | |||
| 169 | fn testAllocator(allocator: Allocator) !void { | ||
| 170 | var gen_cat = try GeneralCategories.init(allocator); | ||
| 171 | gen_cat.deinit(allocator); | ||
| 172 | } | ||
| 173 | |||
| 174 | test "Allocation failure" { | ||
| 175 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 176 | } | ||
| 177 | |||
| 178 | const std = @import("std"); | ||
| 179 | const builtin = @import("builtin"); | ||
| 180 | const compress = std.compress; | ||
| 181 | const mem = std.mem; | ||
| 182 | const testing = std.testing; | ||
| 183 | const Allocator = mem.Allocator; | ||
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 33096fc..24b67a0 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig | |||
| @@ -1,120 +1,58 @@ | |||
| 1 | const CodePointIterator = @import("code_point").Iterator; | 1 | const CodePointIterator = @import("code_point").Iterator; |
| 2 | 2 | const GeneralCategories = @import("GeneralCategories"); | |
| 3 | case_map: [][2]u21 = undefined, | 3 | |
| 4 | prop_s1: []u16 = undefined, | 4 | const Data = struct { |
| 5 | prop_s2: []u8 = undefined, | 5 | s1: []const u16 = undefined, |
| 6 | 6 | s2: []const u44 = undefined, | |
| 7 | const LetterCasing = @This(); | 7 | }; |
| 8 | 8 | ||
| 9 | pub fn init(allocator: Allocator) Allocator.Error!LetterCasing { | 9 | const letter_casing = letter_casing: { |
| 10 | var case = LetterCasing{}; | 10 | const data = @import("case"); |
| 11 | try case.setup(allocator); | 11 | break :letter_casing Data{ |
| 12 | return case; | 12 | .s1 = &data.s1, |
| 13 | } | 13 | .s2 = &data.s2, |
| 14 | |||
| 15 | pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { | ||
| 16 | case.setupInner(allocator) catch |err| { | ||
| 17 | switch (err) { | ||
| 18 | error.OutOfMemory => |e| return e, | ||
| 19 | else => unreachable, | ||
| 20 | } | ||
| 21 | }; | 14 | }; |
| 22 | } | 15 | }; |
| 23 | |||
| 24 | inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { | ||
| 25 | const endian = builtin.cpu.arch.endian(); | ||
| 26 | |||
| 27 | self.case_map = try allocator.alloc([2]u21, 0x110000); | ||
| 28 | errdefer allocator.free(self.case_map); | ||
| 29 | |||
| 30 | for (0..0x110000) |i| { | ||
| 31 | const cp: u21 = @intCast(i); | ||
| 32 | self.case_map[cp] = .{ cp, cp }; | ||
| 33 | } | ||
| 34 | |||
| 35 | // Uppercase | ||
| 36 | const upper_bytes = @embedFile("upper"); | ||
| 37 | var upper_fbs = std.io.fixedBufferStream(upper_bytes); | ||
| 38 | var upper_reader = upper_fbs.reader(); | ||
| 39 | |||
| 40 | while (true) { | ||
| 41 | const cp = try upper_reader.readInt(i24, endian); | ||
| 42 | if (cp == 0) break; | ||
| 43 | const diff = try upper_reader.readInt(i24, endian); | ||
| 44 | self.case_map[@intCast(cp)][0] = @intCast(cp + diff); | ||
| 45 | } | ||
| 46 | |||
| 47 | // Lowercase | ||
| 48 | const lower_bytes = @embedFile("lower"); | ||
| 49 | var lower_fbs = std.io.fixedBufferStream(lower_bytes); | ||
| 50 | var lower_reader = lower_fbs.reader(); | ||
| 51 | |||
| 52 | while (true) { | ||
| 53 | const cp = try lower_reader.readInt(i24, endian); | ||
| 54 | if (cp == 0) break; | ||
| 55 | const diff = try lower_reader.readInt(i24, endian); | ||
| 56 | self.case_map[@intCast(cp)][1] = @intCast(cp + diff); | ||
| 57 | } | ||
| 58 | |||
| 59 | // Case properties | ||
| 60 | const cp_bytes = @embedFile("case_prop"); | ||
| 61 | var cp_fbs = std.io.fixedBufferStream(cp_bytes); | ||
| 62 | var cp_reader = cp_fbs.reader(); | ||
| 63 | |||
| 64 | const stage_1_len: u16 = try cp_reader.readInt(u16, endian); | ||
| 65 | self.prop_s1 = try allocator.alloc(u16, stage_1_len); | ||
| 66 | errdefer allocator.free(self.prop_s1); | ||
| 67 | for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian); | ||
| 68 | |||
| 69 | const stage_2_len: u16 = try cp_reader.readInt(u16, endian); | ||
| 70 | self.prop_s2 = try allocator.alloc(u8, stage_2_len); | ||
| 71 | errdefer allocator.free(self.prop_s2); | ||
| 72 | _ = try cp_reader.readAll(self.prop_s2); | ||
| 73 | } | ||
| 74 | |||
| 75 | pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void { | ||
| 76 | allocator.free(self.case_map); | ||
| 77 | allocator.free(self.prop_s1); | ||
| 78 | allocator.free(self.prop_s2); | ||
| 79 | } | ||
| 80 | 16 | ||
| 81 | // Returns true if `cp` is either upper, lower, or title case. | 17 | // Returns true if `cp` is either upper, lower, or title case. |
| 82 | pub fn isCased(self: LetterCasing, cp: u21) bool { | 18 | pub fn isCased(cp: u21) bool { |
| 83 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 19 | return isUpper(cp) or isLower(cp) or GeneralCategories.gc(cp) == .Lt; |
| 84 | } | 20 | } |
| 85 | 21 | ||
| 86 | // Returns true if `cp` is uppercase. | 22 | // Returns true if `cp` is uppercase. |
| 87 | pub fn isUpper(self: LetterCasing, cp: u21) bool { | 23 | pub fn isUpper(cp: u21) bool { |
| 88 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 24 | // isUpper is true if we have a mapping to a lower character (bit 1) |
| 25 | return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | ||
| 89 | } | 26 | } |
| 90 | 27 | ||
| 91 | /// Returns true if `str` is all uppercase. | 28 | /// Returns true if `str` is all non-lowercase. |
| 92 | pub fn isUpperStr(self: LetterCasing, str: []const u8) bool { | 29 | pub fn isUpperStr(str: []const u8) bool { |
| 93 | var iter = CodePointIterator{ .bytes = str }; | 30 | var iter = CodePointIterator{ .bytes = str }; |
| 94 | 31 | ||
| 95 | return while (iter.next()) |cp| { | 32 | return while (iter.next()) |cp| { |
| 96 | if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false; | 33 | if (isLower(cp.code)) break false; |
| 97 | } else true; | 34 | } else true; |
| 98 | } | 35 | } |
| 99 | 36 | ||
| 100 | test "isUpperStr" { | 37 | test "isUpperStr" { |
| 101 | const cd = try init(testing.allocator); | 38 | try testing.expect(isUpperStr("HELLO, WORLD 2112!")); |
| 102 | defer cd.deinit(testing.allocator); | 39 | try testing.expect(!isUpperStr("hello, world 2112!")); |
| 103 | 40 | try testing.expect(!isUpperStr("Hello, World 2112!")); | |
| 104 | try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!")); | ||
| 105 | try testing.expect(!cd.isUpperStr("hello, world 2112!")); | ||
| 106 | try testing.expect(!cd.isUpperStr("Hello, World 2112!")); | ||
| 107 | } | 41 | } |
| 108 | 42 | ||
| 109 | /// Returns uppercase mapping for `cp`. | 43 | /// Returns uppercase mapping for `cp`. |
| 110 | pub fn toUpper(self: LetterCasing, cp: u21) u21 { | 44 | pub fn toUpper(cp: u21) u21 { |
| 111 | return self.case_map[cp][0]; | 45 | const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)]; |
| 46 | if (case_prop & 2 == 2) { | ||
| 47 | return @intCast(case_prop >> (21 + 2)); | ||
| 48 | } else { | ||
| 49 | return cp; | ||
| 50 | } | ||
| 112 | } | 51 | } |
| 113 | 52 | ||
| 114 | /// Returns a new string with all letters in uppercase. | 53 | /// Returns a new string with all letters in uppercase. |
| 115 | /// Caller must free returned bytes with `allocator`. | 54 | /// Caller must free returned bytes with `allocator`. |
| 116 | pub fn toUpperStr( | 55 | pub fn toUpperStr( |
| 117 | self: LetterCasing, | ||
| 118 | allocator: mem.Allocator, | 56 | allocator: mem.Allocator, |
| 119 | str: []const u8, | 57 | str: []const u8, |
| 120 | ) ![]u8 { | 58 | ) ![]u8 { |
| @@ -125,7 +63,7 @@ pub fn toUpperStr( | |||
| 125 | var buf: [4]u8 = undefined; | 63 | var buf: [4]u8 = undefined; |
| 126 | 64 | ||
| 127 | while (iter.next()) |cp| { | 65 | while (iter.next()) |cp| { |
| 128 | const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf); | 66 | const len = try unicode.utf8Encode(toUpper(cp.code), &buf); |
| 129 | try bytes.appendSlice(buf[0..len]); | 67 | try bytes.appendSlice(buf[0..len]); |
| 130 | } | 68 | } |
| 131 | 69 | ||
| @@ -133,46 +71,45 @@ pub fn toUpperStr( | |||
| 133 | } | 71 | } |
| 134 | 72 | ||
| 135 | test "toUpperStr" { | 73 | test "toUpperStr" { |
| 136 | const cd = try init(testing.allocator); | 74 | const uppered = try toUpperStr(testing.allocator, "Hello, World 2112!"); |
| 137 | defer cd.deinit(testing.allocator); | ||
| 138 | |||
| 139 | const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!"); | ||
| 140 | defer testing.allocator.free(uppered); | 75 | defer testing.allocator.free(uppered); |
| 141 | try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered); | 76 | try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered); |
| 142 | } | 77 | } |
| 143 | 78 | ||
| 144 | // Returns true if `cp` is lowercase. | 79 | // Returns true if `cp` is lowercase. |
| 145 | pub fn isLower(self: LetterCasing, cp: u21) bool { | 80 | pub fn isLower(cp: u21) bool { |
| 146 | return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 81 | // isLower is true if we have a mapping to an upper character (bit 2) |
| 82 | return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | ||
| 147 | } | 83 | } |
| 148 | 84 | ||
| 149 | /// Returns true if `str` is all lowercase. | 85 | /// Returns true if `str` is all non-uppercase. |
| 150 | pub fn isLowerStr(self: LetterCasing, str: []const u8) bool { | 86 | pub fn isLowerStr(str: []const u8) bool { |
| 151 | var iter = CodePointIterator{ .bytes = str }; | 87 | var iter = CodePointIterator{ .bytes = str }; |
| 152 | 88 | ||
| 153 | return while (iter.next()) |cp| { | 89 | return while (iter.next()) |cp| { |
| 154 | if (self.isCased(cp.code) and !self.isLower(cp.code)) break false; | 90 | if (isUpper(cp.code)) break false; |
| 155 | } else true; | 91 | } else true; |
| 156 | } | 92 | } |
| 157 | 93 | ||
| 158 | test "isLowerStr" { | 94 | test "isLowerStr" { |
| 159 | const cd = try init(testing.allocator); | 95 | try testing.expect(isLowerStr("hello, world 2112!")); |
| 160 | defer cd.deinit(testing.allocator); | 96 | try testing.expect(!isLowerStr("HELLO, WORLD 2112!")); |
| 161 | 97 | try testing.expect(!isLowerStr("Hello, World 2112!")); | |
| 162 | try testing.expect(cd.isLowerStr("hello, world 2112!")); | ||
| 163 | try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!")); | ||
| 164 | try testing.expect(!cd.isLowerStr("Hello, World 2112!")); | ||
| 165 | } | 98 | } |
| 166 | 99 | ||
| 167 | /// Returns lowercase mapping for `cp`. | 100 | /// Returns lowercase mapping for `cp`. |
| 168 | pub fn toLower(self: LetterCasing, cp: u21) u21 { | 101 | pub fn toLower(cp: u21) u21 { |
| 169 | return self.case_map[cp][1]; | 102 | const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)]; |
| 103 | if (case_prop & 1 == 1) { | ||
| 104 | return @intCast((case_prop >> 2) & 0x1FFFFF); | ||
| 105 | } else { | ||
| 106 | return cp; | ||
| 107 | } | ||
| 170 | } | 108 | } |
| 171 | 109 | ||
| 172 | /// Returns a new string with all letters in lowercase. | 110 | /// Returns a new string with all letters in lowercase. |
| 173 | /// Caller must free returned bytes with `allocator`. | 111 | /// Caller must free returned bytes with `allocator`. |
| 174 | pub fn toLowerStr( | 112 | pub fn toLowerStr( |
| 175 | self: LetterCasing, | ||
| 176 | allocator: mem.Allocator, | 113 | allocator: mem.Allocator, |
| 177 | str: []const u8, | 114 | str: []const u8, |
| 178 | ) ![]u8 { | 115 | ) ![]u8 { |
| @@ -183,7 +120,7 @@ pub fn toLowerStr( | |||
| 183 | var buf: [4]u8 = undefined; | 120 | var buf: [4]u8 = undefined; |
| 184 | 121 | ||
| 185 | while (iter.next()) |cp| { | 122 | while (iter.next()) |cp| { |
| 186 | const len = try unicode.utf8Encode(self.toLower(cp.code), &buf); | 123 | const len = try unicode.utf8Encode(toLower(cp.code), &buf); |
| 187 | try bytes.appendSlice(buf[0..len]); | 124 | try bytes.appendSlice(buf[0..len]); |
| 188 | } | 125 | } |
| 189 | 126 | ||
| @@ -191,27 +128,13 @@ pub fn toLowerStr( | |||
| 191 | } | 128 | } |
| 192 | 129 | ||
| 193 | test "toLowerStr" { | 130 | test "toLowerStr" { |
| 194 | const cd = try init(testing.allocator); | 131 | const lowered = try toLowerStr(testing.allocator, "Hello, World 2112!"); |
| 195 | defer cd.deinit(testing.allocator); | ||
| 196 | |||
| 197 | const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!"); | ||
| 198 | defer testing.allocator.free(lowered); | 132 | defer testing.allocator.free(lowered); |
| 199 | try testing.expectEqualStrings("hello, world 2112!", lowered); | 133 | try testing.expectEqualStrings("hello, world 2112!", lowered); |
| 200 | } | 134 | } |
| 201 | 135 | ||
| 202 | fn testAllocator(allocator: Allocator) !void { | ||
| 203 | var prop = try LetterCasing.init(allocator); | ||
| 204 | prop.deinit(allocator); | ||
| 205 | } | ||
| 206 | |||
| 207 | test "Allocation failure" { | ||
| 208 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 209 | } | ||
| 210 | |||
| 211 | const std = @import("std"); | 136 | const std = @import("std"); |
| 212 | const builtin = @import("builtin"); | 137 | const builtin = @import("builtin"); |
| 213 | const compress = std.compress; | ||
| 214 | const mem = std.mem; | 138 | const mem = std.mem; |
| 215 | const Allocator = std.mem.Allocator; | ||
| 216 | const testing = std.testing; | 139 | const testing = std.testing; |
| 217 | const unicode = std.unicode; | 140 | const unicode = std.unicode; |
diff --git a/src/Properties.zig b/src/Properties.zig index 432d176..f8c7cfc 100644 --- a/src/Properties.zig +++ b/src/Properties.zig | |||
| @@ -1,177 +1,108 @@ | |||
| 1 | //! Properties module | 1 | //! Properties module |
| 2 | 2 | ||
| 3 | core_s1: []u16 = undefined, | 3 | const Data = struct { |
| 4 | core_s2: []u8 = undefined, | 4 | core_s1: []const u16 = undefined, |
| 5 | props_s1: []u16 = undefined, | 5 | core_s2: []const u8 = undefined, |
| 6 | props_s2: []u8 = undefined, | 6 | props_s1: []const u16 = undefined, |
| 7 | num_s1: []u16 = undefined, | 7 | props_s2: []const u8 = undefined, |
| 8 | num_s2: []u8 = undefined, | 8 | num_s1: []const u16 = undefined, |
| 9 | 9 | num_s2: []const u8 = undefined, | |
| 10 | const Properties = @This(); | 10 | }; |
| 11 | 11 | ||
| 12 | pub fn init(allocator: Allocator) Allocator.Error!Properties { | 12 | const properties = properties: { |
| 13 | var props = Properties{}; | 13 | const core_props = @import("core_props"); |
| 14 | try props.setup(allocator); | 14 | const props_data = @import("props"); |
| 15 | return props; | 15 | const numeric = @import("numeric"); |
| 16 | } | 16 | break :properties Data{ |
| 17 | 17 | .core_s1 = &core_props.s1, | |
| 18 | pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { | 18 | .core_s2 = &core_props.s2, |
| 19 | props.setupInner(allocator) catch |err| { | 19 | .props_s1 = &props_data.s1, |
| 20 | switch (err) { | 20 | .props_s2 = &props_data.s2, |
| 21 | error.OutOfMemory => |e| return e, | 21 | .num_s1 = &numeric.s1, |
| 22 | else => unreachable, | 22 | .num_s2 = &numeric.s2, |
| 23 | } | ||
| 24 | }; | 23 | }; |
| 25 | } | 24 | }; |
| 26 | |||
| 27 | inline fn setupInner(props: *Properties, allocator: Allocator) !void { | ||
| 28 | const endian = builtin.cpu.arch.endian(); | ||
| 29 | |||
| 30 | // Process DerivedCoreProperties.txt | ||
| 31 | const core_bytes = @embedFile("core_props"); | ||
| 32 | var core_fbs = std.io.fixedBufferStream(core_bytes); | ||
| 33 | var core_reader = core_fbs.reader(); | ||
| 34 | |||
| 35 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); | ||
| 36 | props.core_s1 = try allocator.alloc(u16, core_stage_1_len); | ||
| 37 | errdefer allocator.free(props.core_s1); | ||
| 38 | for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian); | ||
| 39 | |||
| 40 | const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); | ||
| 41 | props.core_s2 = try allocator.alloc(u8, core_stage_2_len); | ||
| 42 | errdefer allocator.free(props.core_s2); | ||
| 43 | _ = try core_reader.readAll(props.core_s2); | ||
| 44 | |||
| 45 | // Process PropList.txt | ||
| 46 | const props_bytes = @embedFile("props"); | ||
| 47 | var props_fbs = std.io.fixedBufferStream(props_bytes); | ||
| 48 | var props_reader = props_fbs.reader(); | ||
| 49 | |||
| 50 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); | ||
| 51 | props.props_s1 = try allocator.alloc(u16, stage_1_len); | ||
| 52 | errdefer allocator.free(props.props_s1); | ||
| 53 | for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian); | ||
| 54 | |||
| 55 | const stage_2_len: u16 = try props_reader.readInt(u16, endian); | ||
| 56 | props.props_s2 = try allocator.alloc(u8, stage_2_len); | ||
| 57 | errdefer allocator.free(props.props_s2); | ||
| 58 | _ = try props_reader.readAll(props.props_s2); | ||
| 59 | |||
| 60 | // Process DerivedNumericType.txt | ||
| 61 | const num_bytes = @embedFile("numeric"); | ||
| 62 | var num_fbs = std.io.fixedBufferStream(num_bytes); | ||
| 63 | var num_reader = num_fbs.reader(); | ||
| 64 | |||
| 65 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); | ||
| 66 | props.num_s1 = try allocator.alloc(u16, num_stage_1_len); | ||
| 67 | errdefer allocator.free(props.num_s1); | ||
| 68 | for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian); | ||
| 69 | |||
| 70 | const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); | ||
| 71 | props.num_s2 = try allocator.alloc(u8, num_stage_2_len); | ||
| 72 | errdefer allocator.free(props.num_s2); | ||
| 73 | _ = try num_reader.readAll(props.num_s2); | ||
| 74 | } | ||
| 75 | 25 | ||
| 76 | pub fn deinit(self: *const Properties, allocator: Allocator) void { | 26 | const Properties = @This(); |
| 77 | allocator.free(self.core_s1); | ||
| 78 | allocator.free(self.core_s2); | ||
| 79 | allocator.free(self.props_s1); | ||
| 80 | allocator.free(self.props_s2); | ||
| 81 | allocator.free(self.num_s1); | ||
| 82 | allocator.free(self.num_s2); | ||
| 83 | } | ||
| 84 | 27 | ||
| 85 | /// True if `cp` is a mathematical symbol. | 28 | /// True if `cp` is a mathematical symbol. |
| 86 | pub fn isMath(self: Properties, cp: u21) bool { | 29 | pub fn isMath(cp: u21) bool { |
| 87 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 30 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 88 | } | 31 | } |
| 89 | 32 | ||
| 90 | /// True if `cp` is an alphabetic character. | 33 | /// True if `cp` is an alphabetic character. |
| 91 | pub fn isAlphabetic(self: Properties, cp: u21) bool { | 34 | pub fn isAlphabetic(cp: u21) bool { |
| 92 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 35 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 93 | } | 36 | } |
| 94 | 37 | ||
| 95 | /// True if `cp` is a valid identifier start character. | 38 | /// True if `cp` is a valid identifier start character. |
| 96 | pub fn isIdStart(self: Properties, cp: u21) bool { | 39 | pub fn isIdStart(cp: u21) bool { |
| 97 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 40 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 98 | } | 41 | } |
| 99 | 42 | ||
| 100 | /// True if `cp` is a valid identifier continuation character. | 43 | /// True if `cp` is a valid identifier continuation character. |
| 101 | pub fn isIdContinue(self: Properties, cp: u21) bool { | 44 | pub fn isIdContinue(cp: u21) bool { |
| 102 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; | 45 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; |
| 103 | } | 46 | } |
| 104 | 47 | ||
| 105 | /// True if `cp` is a valid extended identifier start character. | 48 | /// True if `cp` is a valid extended identifier start character. |
| 106 | pub fn isXidStart(self: Properties, cp: u21) bool { | 49 | pub fn isXidStart(cp: u21) bool { |
| 107 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; | 50 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; |
| 108 | } | 51 | } |
| 109 | 52 | ||
| 110 | /// True if `cp` is a valid extended identifier continuation character. | 53 | /// True if `cp` is a valid extended identifier continuation character. |
| 111 | pub fn isXidContinue(self: Properties, cp: u21) bool { | 54 | pub fn isXidContinue(cp: u21) bool { |
| 112 | return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; | 55 | return properties.core_s2[properties.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; |
| 113 | } | 56 | } |
| 114 | 57 | ||
| 115 | /// True if `cp` is a whitespace character. | 58 | /// True if `cp` is a whitespace character. |
| 116 | pub fn isWhitespace(self: Properties, cp: u21) bool { | 59 | pub fn isWhitespace(cp: u21) bool { |
| 117 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 60 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 118 | } | 61 | } |
| 119 | 62 | ||
| 120 | /// True if `cp` is a hexadecimal digit. | 63 | /// True if `cp` is a hexadecimal digit. |
| 121 | pub fn isHexDigit(self: Properties, cp: u21) bool { | 64 | pub fn isHexDigit(cp: u21) bool { |
| 122 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 65 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 123 | } | 66 | } |
| 124 | 67 | ||
| 125 | /// True if `cp` is a diacritic mark. | 68 | /// True if `cp` is a diacritic mark. |
| 126 | pub fn isDiacritic(self: Properties, cp: u21) bool { | 69 | pub fn isDiacritic(cp: u21) bool { |
| 127 | return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 70 | return properties.props_s2[properties.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 128 | } | 71 | } |
| 129 | 72 | ||
| 130 | /// True if `cp` is numeric. | 73 | /// True if `cp` is numeric. |
| 131 | pub fn isNumeric(self: Properties, cp: u21) bool { | 74 | pub fn isNumeric(cp: u21) bool { |
| 132 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; | 75 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; |
| 133 | } | 76 | } |
| 134 | 77 | ||
| 135 | /// True if `cp` is a digit. | 78 | /// True if `cp` is a digit. |
| 136 | pub fn isDigit(self: Properties, cp: u21) bool { | 79 | pub fn isDigit(cp: u21) bool { |
| 137 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; | 80 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; |
| 138 | } | 81 | } |
| 139 | 82 | ||
| 140 | /// True if `cp` is decimal. | 83 | /// True if `cp` is decimal. |
| 141 | pub fn isDecimal(self: Properties, cp: u21) bool { | 84 | pub fn isDecimal(cp: u21) bool { |
| 142 | return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; | 85 | return properties.num_s2[properties.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; |
| 143 | } | 86 | } |
| 144 | 87 | ||
| 145 | test "Props" { | 88 | test "Props" { |
| 146 | const self = try init(testing.allocator); | 89 | try testing.expect(Properties.isHexDigit('F')); |
| 147 | defer self.deinit(testing.allocator); | 90 | try testing.expect(Properties.isHexDigit('a')); |
| 148 | 91 | try testing.expect(Properties.isHexDigit('8')); | |
| 149 | try testing.expect(self.isHexDigit('F')); | 92 | try testing.expect(!Properties.isHexDigit('z')); |
| 150 | try testing.expect(self.isHexDigit('a')); | 93 | |
| 151 | try testing.expect(self.isHexDigit('8')); | 94 | try testing.expect(Properties.isDiacritic('\u{301}')); |
| 152 | try testing.expect(!self.isHexDigit('z')); | 95 | try testing.expect(Properties.isAlphabetic('A')); |
| 153 | 96 | try testing.expect(!Properties.isAlphabetic('3')); | |
| 154 | try testing.expect(self.isDiacritic('\u{301}')); | 97 | try testing.expect(Properties.isMath('+')); |
| 155 | try testing.expect(self.isAlphabetic('A')); | 98 | |
| 156 | try testing.expect(!self.isAlphabetic('3')); | 99 | try testing.expect(Properties.isNumeric('\u{277f}')); |
| 157 | try testing.expect(self.isMath('+')); | 100 | try testing.expect(Properties.isDigit('\u{2070}')); |
| 158 | 101 | try testing.expect(Properties.isDecimal('3')); | |
| 159 | try testing.expect(self.isNumeric('\u{277f}')); | 102 | |
| 160 | try testing.expect(self.isDigit('\u{2070}')); | 103 | try testing.expect(!Properties.isNumeric('1')); |
| 161 | try testing.expect(self.isDecimal('3')); | 104 | try testing.expect(!Properties.isDigit('2')); |
| 162 | 105 | try testing.expect(!Properties.isDecimal('g')); | |
| 163 | try testing.expect(!self.isNumeric('1')); | ||
| 164 | try testing.expect(!self.isDigit('2')); | ||
| 165 | try testing.expect(!self.isDecimal('g')); | ||
| 166 | } | ||
| 167 | |||
| 168 | fn testAllocator(allocator: Allocator) !void { | ||
| 169 | var prop = try Properties.init(allocator); | ||
| 170 | prop.deinit(allocator); | ||
| 171 | } | ||
| 172 | |||
| 173 | test "Allocation failure" { | ||
| 174 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 175 | } | 106 | } |
| 176 | 107 | ||
| 177 | const std = @import("std"); | 108 | const std = @import("std"); |
diff --git a/src/Scripts.zig b/src/Scripts.zig index 719b01f..4938318 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig | |||
| @@ -1,8 +1,18 @@ | |||
| 1 | //! Scripts Module | 1 | //! Scripts Module |
| 2 | const Data = struct { | ||
| 3 | s1: []const u16 = undefined, | ||
| 4 | s2: []const u8 = undefined, | ||
| 5 | s3: []const u8 = undefined, | ||
| 6 | }; | ||
| 2 | 7 | ||
| 3 | s1: []u16 = undefined, | 8 | const scripts = scripts: { |
| 4 | s2: []u8 = undefined, | 9 | const data = @import("script"); |
| 5 | s3: []u8 = undefined, | 10 | break :scripts Data{ |
| 11 | .s1 = &data.s1, | ||
| 12 | .s2 = &data.s2, | ||
| 13 | .s3 = &data.s3, | ||
| 14 | }; | ||
| 15 | }; | ||
| 6 | 16 | ||
| 7 | /// Scripts enum | 17 | /// Scripts enum |
| 8 | pub const Script = enum { | 18 | pub const Script = enum { |
| @@ -178,76 +188,20 @@ pub const Script = enum { | |||
| 178 | Yi, | 188 | Yi, |
| 179 | Zanabazar_Square, | 189 | Zanabazar_Square, |
| 180 | }; | 190 | }; |
| 181 | const Scripts = @This(); | ||
| 182 | |||
| 183 | pub fn init(allocator: Allocator) Allocator.Error!Scripts { | ||
| 184 | var scripts = Scripts{}; | ||
| 185 | try scripts.setup(allocator); | ||
| 186 | return scripts; | ||
| 187 | } | ||
| 188 | |||
| 189 | pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { | ||
| 190 | scripts.setupInner(allocator) catch |err| { | ||
| 191 | switch (err) { | ||
| 192 | error.OutOfMemory => |e| return e, | ||
| 193 | else => unreachable, | ||
| 194 | } | ||
| 195 | }; | ||
| 196 | } | ||
| 197 | |||
| 198 | inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { | ||
| 199 | const in_bytes = @embedFile("scripts"); | ||
| 200 | var in_fbs = std.io.fixedBufferStream(in_bytes); | ||
| 201 | var reader = in_fbs.reader(); | ||
| 202 | |||
| 203 | const endian = builtin.cpu.arch.endian(); | ||
| 204 | |||
| 205 | const s1_len: u16 = try reader.readInt(u16, endian); | ||
| 206 | scripts.s1 = try allocator.alloc(u16, s1_len); | ||
| 207 | errdefer allocator.free(scripts.s1); | ||
| 208 | for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian); | ||
| 209 | |||
| 210 | const s2_len: u16 = try reader.readInt(u16, endian); | ||
| 211 | scripts.s2 = try allocator.alloc(u8, s2_len); | ||
| 212 | errdefer allocator.free(scripts.s2); | ||
| 213 | _ = try reader.readAll(scripts.s2); | ||
| 214 | |||
| 215 | const s3_len: u16 = try reader.readInt(u8, endian); | ||
| 216 | scripts.s3 = try allocator.alloc(u8, s3_len); | ||
| 217 | errdefer allocator.free(scripts.s3); | ||
| 218 | _ = try reader.readAll(scripts.s3); | ||
| 219 | } | ||
| 220 | |||
| 221 | pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void { | ||
| 222 | allocator.free(self.s1); | ||
| 223 | allocator.free(self.s2); | ||
| 224 | allocator.free(self.s3); | ||
| 225 | } | ||
| 226 | 191 | ||
| 227 | /// Lookup the Script type for `cp`. | 192 | /// Lookup the Script type for `cp`. |
| 228 | pub fn script(self: Scripts, cp: u21) ?Script { | 193 | pub fn script(cp: u21) ?Script { |
| 229 | const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; | 194 | const byte = scripts.s3[scripts.s2[scripts.s1[cp >> 8] + (cp & 0xff)]]; |
| 230 | if (byte == 0) return null; | 195 | if (byte == 0) return null; |
| 231 | return @enumFromInt(byte); | 196 | return @enumFromInt(byte); |
| 232 | } | 197 | } |
| 233 | 198 | ||
| 234 | test "script" { | 199 | test "script" { |
| 235 | const self = try init(std.testing.allocator); | 200 | try testing.expectEqual(Script.Latin, script('A').?); |
| 236 | defer self.deinit(std.testing.allocator); | 201 | // try testing.expectEqual(Script.Deseret, script('𐐌').?); |
| 237 | try testing.expectEqual(Script.Latin, self.script('A').?); | ||
| 238 | } | ||
| 239 | |||
| 240 | fn testAllocator(allocator: Allocator) !void { | ||
| 241 | var prop = try Scripts.init(allocator); | ||
| 242 | prop.deinit(allocator); | ||
| 243 | } | ||
| 244 | |||
| 245 | test "Allocation failure" { | ||
| 246 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 247 | } | 202 | } |
| 248 | 203 | ||
| 249 | const std = @import("std"); | 204 | const std = @import("std"); |
| 250 | const builtin = @import("builtin"); | 205 | const builtin = @import("builtin"); |
| 251 | const mem = std.mem; | 206 | const unicode = std.unicode; |
| 252 | const Allocator = mem.Allocator; | ||
| 253 | const testing = std.testing; | 207 | const testing = std.testing; |