summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-28 10:06:00 -0400
committerGravatar Jose Colon Rodriguez2024-03-28 10:06:00 -0400
commit74be85ac145cc6de5d03348e07be8d982c2211cb (patch)
treeb3b5f2080235e5cf73d2be080fb70583567dfb2b
parentScriptsData and made all Datas const (diff)
downloadzg-74be85ac145cc6de5d03348e07be8d982c2211cb.tar.gz
zg-74be85ac145cc6de5d03348e07be8d982c2211cb.tar.xz
zg-74be85ac145cc6de5d03348e07be8d982c2211cb.zip
PropsData and errdefers for init fns
-rw-r--r--build.zig33
-rw-r--r--codegen/core_props.zig139
-rw-r--r--codegen/props.zig136
-rw-r--r--src/CanonData.zig10
-rw-r--r--src/CombiningData.zig2
-rw-r--r--src/CompatData.zig1
-rw-r--r--src/FoldData.zig8
-rw-r--r--src/GenCatData.zig3
-rw-r--r--src/GraphemeData.zig3
-rw-r--r--src/HangulData.zig2
-rw-r--r--src/NormData.zig31
-rw-r--r--src/NormPropsData.zig2
-rw-r--r--src/NumericData.zig10
-rw-r--r--src/PropsData.zig123
-rw-r--r--src/ScriptsData.zig3
-rw-r--r--src/WidthData.zig3
16 files changed, 485 insertions, 24 deletions
diff --git a/build.zig b/build.zig
index 58c3f21..a24181a 100644
--- a/build.zig
+++ b/build.zig
@@ -146,6 +146,24 @@ pub fn build(b: *std.Build) void {
146 const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe); 146 const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe);
147 const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z"); 147 const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z");
148 148
149 const core_gen_exe = b.addExecutable(.{
150 .name = "core",
151 .root_source_file = .{ .path = "codegen/core_props.zig" },
152 .target = b.host,
153 .optimize = .Debug,
154 });
155 const run_core_gen_exe = b.addRunArtifact(core_gen_exe);
156 const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z");
157
158 const props_gen_exe = b.addExecutable(.{
159 .name = "props",
160 .root_source_file = .{ .path = "codegen/props.zig" },
161 .target = b.host,
162 .optimize = .Debug,
163 });
164 const run_props_gen_exe = b.addRunArtifact(props_gen_exe);
165 const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z");
166
149 // Modules we provide 167 // Modules we provide
150 // Code points 168 // Code points
151 const code_point = b.addModule("code_point", .{ 169 const code_point = b.addModule("code_point", .{
@@ -304,9 +322,18 @@ pub fn build(b: *std.Build) void {
304 }); 322 });
305 scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); 323 scripts_data.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out });
306 324
325 // Properties
326 const props_data = b.addModule("ScriptsData", .{
327 .root_source_file = .{ .path = "src/PropsData.zig" },
328 .target = target,
329 .optimize = optimize,
330 });
331 props_data.addAnonymousImport("core_props", .{ .root_source_file = core_gen_out });
332 props_data.addAnonymousImport("props", .{ .root_source_file = props_gen_out });
333
307 // Tests 334 // Tests
308 const exe_unit_tests = b.addTest(.{ 335 const exe_unit_tests = b.addTest(.{
309 .root_source_file = .{ .path = "src/ScriptsData.zig" }, 336 .root_source_file = .{ .path = "src/PropsData.zig" },
310 .target = target, 337 .target = target,
311 .optimize = optimize, 338 .optimize = optimize,
312 }); 339 });
@@ -324,7 +351,9 @@ pub fn build(b: *std.Build) void {
324 // exe_unit_tests.root_module.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out }); 351 // exe_unit_tests.root_module.addAnonymousImport("case_prop", .{ .root_source_file = case_prop_gen_out });
325 // exe_unit_tests.root_module.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out }); 352 // exe_unit_tests.root_module.addAnonymousImport("upper", .{ .root_source_file = upper_gen_out });
326 // exe_unit_tests.root_module.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out }); 353 // exe_unit_tests.root_module.addAnonymousImport("lower", .{ .root_source_file = lower_gen_out });
327 exe_unit_tests.root_module.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out }); 354 // exe_unit_tests.root_module.addAnonymousImport("scripts", .{ .root_source_file = scripts_gen_out });
355 exe_unit_tests.root_module.addAnonymousImport("core_props", .{ .root_source_file = core_gen_out });
356 exe_unit_tests.root_module.addAnonymousImport("props", .{ .root_source_file = props_gen_out });
328 // exe_unit_tests.filter = "nfd !ASCII"; 357 // exe_unit_tests.filter = "nfd !ASCII";
329 358
330 const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); 359 const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
diff --git a/codegen/core_props.zig b/codegen/core_props.zig
new file mode 100644
index 0000000..1f46f9e
--- /dev/null
+++ b/codegen/core_props.zig
@@ -0,0 +1,139 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const mem = std.mem;
4
5const block_size = 256;
6const Block = [block_size]u8;
7
8const BlockMap = std.HashMap(
9 Block,
10 u16,
11 struct {
12 pub fn hash(_: @This(), k: Block) u64 {
13 var hasher = std.hash.Wyhash.init(0);
14 std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
15 return hasher.final();
16 }
17
18 pub fn eql(_: @This(), a: Block, b: Block) bool {
19 return mem.eql(u8, &a, &b);
20 }
21 },
22 std.hash_map.default_max_load_percentage,
23);
24
25pub fn main() !void {
26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
27 defer arena.deinit();
28 const allocator = arena.allocator();
29
30 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
31 defer flat_map.deinit();
32
33 var line_buf: [4096]u8 = undefined;
34
35 // Process DerivedCoreProperties.txt
36 var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{});
37 defer in_file.close();
38 var in_buf = std.io.bufferedReader(in_file.reader());
39 const in_reader = in_buf.reader();
40
41 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
42 if (line.len == 0 or line[0] == '#') continue;
43 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
44
45 var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
46 var current_code: [2]u21 = undefined;
47
48 var i: usize = 0;
49 while (field_iter.next()) |field| : (i += 1) {
50 switch (i) {
51 0 => {
52 // Code point(s)
53 if (mem.indexOf(u8, field, "..")) |dots| {
54 current_code = .{
55 try std.fmt.parseInt(u21, field[0..dots], 16),
56 try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
57 };
58 } else {
59 const code = try std.fmt.parseInt(u21, field, 16);
60 current_code = .{ code, code };
61 }
62 },
63 1 => {
64 // Core property
65 var bit: u8 = 0;
66
67 if (mem.eql(u8, field, "Math")) bit = 1;
68 if (mem.eql(u8, field, "Alphabetic")) bit = 2;
69 if (mem.eql(u8, field, "ID_Start")) bit = 4;
70 if (mem.eql(u8, field, "ID_Continue")) bit = 8;
71 if (mem.eql(u8, field, "XID_Start")) bit = 16;
72 if (mem.eql(u8, field, "XID_Continue")) bit = 32;
73
74 if (bit != 0) {
75 for (current_code[0]..current_code[1] + 1) |cp| {
76 const gop = try flat_map.getOrPut(@intCast(cp));
77 if (!gop.found_existing) gop.value_ptr.* = 0;
78 gop.value_ptr.* |= bit;
79 }
80 }
81 },
82 else => {},
83 }
84 }
85 }
86
87 var blocks_map = BlockMap.init(allocator);
88 defer blocks_map.deinit();
89
90 var stage1 = std.ArrayList(u16).init(allocator);
91 defer stage1.deinit();
92
93 var stage2 = std.ArrayList(u8).init(allocator);
94 defer stage2.deinit();
95
96 var block: Block = [_]u8{0} ** block_size;
97 var block_len: u16 = 0;
98
99 for (0..0x110000) |i| {
100 const cp: u21 = @intCast(i);
101 const prop = flat_map.get(cp) orelse 0;
102
103 // Process block
104 block[block_len] = prop;
105 block_len += 1;
106
107 if (block_len < block_size and cp != 0x10ffff) continue;
108
109 const gop = try blocks_map.getOrPut(block);
110 if (!gop.found_existing) {
111 gop.value_ptr.* = @intCast(stage2.items.len);
112 try stage2.appendSlice(&block);
113 }
114
115 try stage1.append(gop.value_ptr.*);
116 block_len = 0;
117 }
118
119 var args_iter = try std.process.argsWithAllocator(allocator);
120 defer args_iter.deinit();
121 _ = args_iter.skip();
122 const output_path = args_iter.next() orelse @panic("No output file arg!");
123
124 const compressor = std.compress.deflate.compressor;
125 var out_file = try std.fs.cwd().createFile(output_path, .{});
126 defer out_file.close();
127 var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
128 defer out_comp.deinit();
129 const writer = out_comp.writer();
130
131 const endian = builtin.cpu.arch.endian();
132 try writer.writeInt(u16, @intCast(stage1.items.len), endian);
133 for (stage1.items) |i| try writer.writeInt(u16, i, endian);
134
135 try writer.writeInt(u16, @intCast(stage2.items.len), endian);
136 try writer.writeAll(stage2.items);
137
138 try out_comp.flush();
139}
diff --git a/codegen/props.zig b/codegen/props.zig
new file mode 100644
index 0000000..57a205e
--- /dev/null
+++ b/codegen/props.zig
@@ -0,0 +1,136 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const mem = std.mem;
4
5const block_size = 256;
6const Block = [block_size]u8;
7
8const BlockMap = std.HashMap(
9 Block,
10 u16,
11 struct {
12 pub fn hash(_: @This(), k: Block) u64 {
13 var hasher = std.hash.Wyhash.init(0);
14 std.hash.autoHashStrat(&hasher, k, .DeepRecursive);
15 return hasher.final();
16 }
17
18 pub fn eql(_: @This(), a: Block, b: Block) bool {
19 return mem.eql(u8, &a, &b);
20 }
21 },
22 std.hash_map.default_max_load_percentage,
23);
24
25pub fn main() !void {
26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
27 defer arena.deinit();
28 const allocator = arena.allocator();
29
30 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
31 defer flat_map.deinit();
32
33 var line_buf: [4096]u8 = undefined;
34
35 // Process PropList.txt
36 var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{});
37 defer in_file.close();
38 var in_buf = std.io.bufferedReader(in_file.reader());
39 const in_reader = in_buf.reader();
40
41 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
42 if (line.len == 0 or line[0] == '#') continue;
43 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
44
45 var field_iter = mem.tokenizeAny(u8, no_comment, "; ");
46 var current_code: [2]u21 = undefined;
47
48 var i: usize = 0;
49 while (field_iter.next()) |field| : (i += 1) {
50 switch (i) {
51 0 => {
52 // Code point(s)
53 if (mem.indexOf(u8, field, "..")) |dots| {
54 current_code = .{
55 try std.fmt.parseInt(u21, field[0..dots], 16),
56 try std.fmt.parseInt(u21, field[dots + 2 ..], 16),
57 };
58 } else {
59 const code = try std.fmt.parseInt(u21, field, 16);
60 current_code = .{ code, code };
61 }
62 },
63 1 => {
64 // Core property
65 var bit: u8 = 0;
66
67 if (mem.eql(u8, field, "White_Space")) bit = 1;
68 if (mem.eql(u8, field, "Hex_Digit")) bit = 2;
69 if (mem.eql(u8, field, "Diacritic")) bit = 4;
70
71 if (bit != 0) {
72 for (current_code[0]..current_code[1] + 1) |cp| {
73 const gop = try flat_map.getOrPut(@intCast(cp));
74 if (!gop.found_existing) gop.value_ptr.* = 0;
75 gop.value_ptr.* |= bit;
76 }
77 }
78 },
79 else => {},
80 }
81 }
82 }
83
84 var blocks_map = BlockMap.init(allocator);
85 defer blocks_map.deinit();
86
87 var stage1 = std.ArrayList(u16).init(allocator);
88 defer stage1.deinit();
89
90 var stage2 = std.ArrayList(u8).init(allocator);
91 defer stage2.deinit();
92
93 var block: Block = [_]u8{0} ** block_size;
94 var block_len: u16 = 0;
95
96 for (0..0x110000) |i| {
97 const cp: u21 = @intCast(i);
98 const prop = flat_map.get(cp) orelse 0;
99
100 // Process block
101 block[block_len] = prop;
102 block_len += 1;
103
104 if (block_len < block_size and cp != 0x10ffff) continue;
105
106 const gop = try blocks_map.getOrPut(block);
107 if (!gop.found_existing) {
108 gop.value_ptr.* = @intCast(stage2.items.len);
109 try stage2.appendSlice(&block);
110 }
111
112 try stage1.append(gop.value_ptr.*);
113 block_len = 0;
114 }
115
116 var args_iter = try std.process.argsWithAllocator(allocator);
117 defer args_iter.deinit();
118 _ = args_iter.skip();
119 const output_path = args_iter.next() orelse @panic("No output file arg!");
120
121 const compressor = std.compress.deflate.compressor;
122 var out_file = try std.fs.cwd().createFile(output_path, .{});
123 defer out_file.close();
124 var out_comp = try compressor(allocator, out_file.writer(), .{ .level = .best_compression });
125 defer out_comp.deinit();
126 const writer = out_comp.writer();
127
128 const endian = builtin.cpu.arch.endian();
129 try writer.writeInt(u16, @intCast(stage1.items.len), endian);
130 for (stage1.items) |i| try writer.writeInt(u16, i, endian);
131
132 try writer.writeInt(u16, @intCast(stage2.items.len), endian);
133 try writer.writeAll(stage2.items);
134
135 try out_comp.flush();
136}
diff --git a/src/CanonData.zig b/src/CanonData.zig
index 9f1deb8..64d5555 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -24,6 +24,13 @@ pub fn init(allocator: mem.Allocator) !Self {
24 .nfd = try allocator.alloc([]u21, 0x110000), 24 .nfd = try allocator.alloc([]u21, 0x110000),
25 }; 25 };
26 26
27 var slices: usize = 0;
28 errdefer {
29 self.nfc.deinit();
30 for (self.nfd[0..slices]) |slice| self.allocator.free(slice);
31 self.allocator.free(self.nfd);
32 }
33
27 @memset(self.nfd, &.{}); 34 @memset(self.nfd, &.{});
28 35
29 while (true) { 36 while (true) {
@@ -31,6 +38,7 @@ pub fn init(allocator: mem.Allocator) !Self {
31 if (len == 0) break; 38 if (len == 0) break;
32 const cp = try reader.readInt(u24, endian); 39 const cp = try reader.readInt(u24, endian);
33 self.nfd[cp] = try allocator.alloc(u21, len - 1); 40 self.nfd[cp] = try allocator.alloc(u21, len - 1);
41 slices += 1;
34 for (0..len - 1) |i| { 42 for (0..len - 1) |i| {
35 self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian)); 43 self.nfd[cp][i] = @intCast(try reader.readInt(u24, endian));
36 } 44 }
@@ -42,7 +50,7 @@ pub fn init(allocator: mem.Allocator) !Self {
42 return self; 50 return self;
43} 51}
44 52
45pub fn deinit(self: *const Self) void { 53pub fn deinit(self: *Self) void {
46 self.nfc.deinit(); 54 self.nfc.deinit();
47 for (self.nfd) |slice| self.allocator.free(slice); 55 for (self.nfd) |slice| self.allocator.free(slice);
48 self.allocator.free(self.nfd); 56 self.allocator.free(self.nfd);
diff --git a/src/CombiningData.zig b/src/CombiningData.zig
index c67638c..a40cbde 100644
--- a/src/CombiningData.zig
+++ b/src/CombiningData.zig
@@ -23,10 +23,12 @@ pub fn init(allocator: mem.Allocator) !Self {
23 23
24 const stage_1_len: u16 = try reader.readInt(u16, endian); 24 const stage_1_len: u16 = try reader.readInt(u16, endian);
25 self.s1 = try allocator.alloc(u16, stage_1_len); 25 self.s1 = try allocator.alloc(u16, stage_1_len);
26 errdefer allocator.free(self.s1);
26 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 27 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
27 28
28 const stage_2_len: u16 = try reader.readInt(u16, endian); 29 const stage_2_len: u16 = try reader.readInt(u16, endian);
29 self.s2 = try allocator.alloc(u8, stage_2_len); 30 self.s2 = try allocator.alloc(u8, stage_2_len);
31 errdefer allocator.free(self.s2);
30 _ = try reader.readAll(self.s2); 32 _ = try reader.readAll(self.s2);
31 33
32 return self; 34 return self;
diff --git a/src/CompatData.zig b/src/CompatData.zig
index 67c43e6..a931cb3 100644
--- a/src/CompatData.zig
+++ b/src/CompatData.zig
@@ -21,6 +21,7 @@ pub fn init(allocator: mem.Allocator) !Self {
21 .allocator = allocator, 21 .allocator = allocator,
22 .nfkd = try allocator.alloc([]u21, 0x110000), 22 .nfkd = try allocator.alloc([]u21, 0x110000),
23 }; 23 };
24 errdefer self.deinit();
24 25
25 @memset(self.nfkd, &.{}); 26 @memset(self.nfkd, &.{});
26 27
diff --git a/src/FoldData.zig b/src/FoldData.zig
index e387447..a06eefe 100644
--- a/src/FoldData.zig
+++ b/src/FoldData.zig
@@ -24,6 +24,13 @@ pub fn init(allocator: mem.Allocator) !Self {
24 .cwcf = try allocator.alloc(bool, 0x110000), 24 .cwcf = try allocator.alloc(bool, 0x110000),
25 }; 25 };
26 26
27 var slices: usize = 0;
28 errdefer {
29 for (self.fold[0..slices]) |slice| self.allocator.free(slice);
30 self.allocator.free(self.fold);
31 self.allocator.free(self.cwcf);
32 }
33
27 @memset(self.fold, &.{}); 34 @memset(self.fold, &.{});
28 @memset(self.cwcf, false); 35 @memset(self.cwcf, false);
29 36
@@ -32,6 +39,7 @@ pub fn init(allocator: mem.Allocator) !Self {
32 if (len == 0) break; 39 if (len == 0) break;
33 const cp = try reader.readInt(u24, endian); 40 const cp = try reader.readInt(u24, endian);
34 self.fold[cp >> 1] = try allocator.alloc(u21, len - 1); 41 self.fold[cp >> 1] = try allocator.alloc(u21, len - 1);
42 slices += 1;
35 for (0..len - 1) |i| { 43 for (0..len - 1) |i| {
36 self.fold[cp >> 1][i] = @intCast(try reader.readInt(u24, endian)); 44 self.fold[cp >> 1][i] = @intCast(try reader.readInt(u24, endian));
37 } 45 }
diff --git a/src/GenCatData.zig b/src/GenCatData.zig
index 37ae037..12501bf 100644
--- a/src/GenCatData.zig
+++ b/src/GenCatData.zig
@@ -58,14 +58,17 @@ pub fn init(allocator: mem.Allocator) !Self {
58 58
59 const s1_len: u16 = try reader.readInt(u16, endian); 59 const s1_len: u16 = try reader.readInt(u16, endian);
60 self.s1 = try allocator.alloc(u16, s1_len); 60 self.s1 = try allocator.alloc(u16, s1_len);
61 errdefer allocator.free(self.s1);
61 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 62 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
62 63
63 const s2_len: u16 = try reader.readInt(u16, endian); 64 const s2_len: u16 = try reader.readInt(u16, endian);
64 self.s2 = try allocator.alloc(u5, s2_len); 65 self.s2 = try allocator.alloc(u5, s2_len);
66 errdefer allocator.free(self.s2);
65 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); 67 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
66 68
67 const s3_len: u16 = try reader.readInt(u8, endian); 69 const s3_len: u16 = try reader.readInt(u8, endian);
68 self.s3 = try allocator.alloc(u5, s3_len); 70 self.s3 = try allocator.alloc(u5, s3_len);
71 errdefer allocator.free(self.s3);
69 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); 72 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian));
70 73
71 return self; 74 return self;
diff --git a/src/GraphemeData.zig b/src/GraphemeData.zig
index 971929a..500ffea 100644
--- a/src/GraphemeData.zig
+++ b/src/GraphemeData.zig
@@ -51,14 +51,17 @@ pub fn init(allocator: mem.Allocator) !Self {
51 51
52 const s1_len: u16 = try reader.readInt(u16, endian); 52 const s1_len: u16 = try reader.readInt(u16, endian);
53 self.s1 = try allocator.alloc(u16, s1_len); 53 self.s1 = try allocator.alloc(u16, s1_len);
54 errdefer allocator.free(self.s1);
54 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 55 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
55 56
56 const s2_len: u16 = try reader.readInt(u16, endian); 57 const s2_len: u16 = try reader.readInt(u16, endian);
57 self.s2 = try allocator.alloc(u16, s2_len); 58 self.s2 = try allocator.alloc(u16, s2_len);
59 errdefer allocator.free(self.s2);
58 for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian); 60 for (0..s2_len) |i| self.s2[i] = try reader.readInt(u16, endian);
59 61
60 const s3_len: u16 = try reader.readInt(u16, endian); 62 const s3_len: u16 = try reader.readInt(u16, endian);
61 self.s3 = try allocator.alloc(u8, s3_len); 63 self.s3 = try allocator.alloc(u8, s3_len);
64 errdefer allocator.free(self.s3);
62 _ = try reader.readAll(self.s3); 65 _ = try reader.readAll(self.s3);
63 66
64 return self; 67 return self;
diff --git a/src/HangulData.zig b/src/HangulData.zig
index ec360e9..99d91c1 100644
--- a/src/HangulData.zig
+++ b/src/HangulData.zig
@@ -32,10 +32,12 @@ pub fn init(allocator: mem.Allocator) !Self {
32 32
33 const stage_1_len: u16 = try reader.readInt(u16, endian); 33 const stage_1_len: u16 = try reader.readInt(u16, endian);
34 self.s1 = try allocator.alloc(u16, stage_1_len); 34 self.s1 = try allocator.alloc(u16, stage_1_len);
35 errdefer allocator.free(self.s1);
35 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 36 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
36 37
37 const stage_2_len: u16 = try reader.readInt(u16, endian); 38 const stage_2_len: u16 = try reader.readInt(u16, endian);
38 self.s2 = try allocator.alloc(u3, stage_2_len); 39 self.s2 = try allocator.alloc(u3, stage_2_len);
40 errdefer allocator.free(self.s2);
39 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); 41 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
40 42
41 return self; 43 return self;
diff --git a/src/NormData.zig b/src/NormData.zig
index 413619a..7ffe679 100644
--- a/src/NormData.zig
+++ b/src/NormData.zig
@@ -8,25 +8,30 @@ const FoldData = @import("FoldData");
8const HangulData = @import("HangulData"); 8const HangulData = @import("HangulData");
9const NormPropsData = @import("NormPropsData"); 9const NormPropsData = @import("NormPropsData");
10 10
11canon_data: CanonData, 11canon_data: CanonData = undefined,
12ccc_data: CccData, 12ccc_data: CccData = undefined,
13compat_data: CompatData, 13compat_data: CompatData = undefined,
14hangul_data: HangulData, 14hangul_data: HangulData = undefined,
15normp_data: NormPropsData, 15normp_data: NormPropsData = undefined,
16 16
17const Self = @This(); 17const Self = @This();
18 18
19pub fn init(allocator: std.mem.Allocator) !Self { 19pub fn init(allocator: std.mem.Allocator) !Self {
20 return Self{ 20 var self = Self{};
21 .canon_data = try CanonData.init(allocator), 21 self.canon_data = try CanonData.init(allocator);
22 .ccc_data = try CccData.init(allocator), 22 errdefer self.canon_data.deinit();
23 .compat_data = try CompatData.init(allocator), 23 self.ccc_data = try CccData.init(allocator);
24 .hangul_data = try HangulData.init(allocator), 24 errdefer self.ccc_data.deinit();
25 .normp_data = try NormPropsData.init(allocator), 25 self.compat_data = try CompatData.init(allocator);
26 }; 26 errdefer self.compat_data.deinit();
27 self.hangul_data = try HangulData.init(allocator);
28 errdefer self.hangul_data.deinit();
29 self.normp_data = try NormPropsData.init(allocator);
30
31 return self;
27} 32}
28 33
29pub fn deinit(self: *const Self) void { 34pub fn deinit(self: *Self) void {
30 self.canon_data.deinit(); 35 self.canon_data.deinit();
31 self.ccc_data.deinit(); 36 self.ccc_data.deinit();
32 self.compat_data.deinit(); 37 self.compat_data.deinit();
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig
index 893a8d0..86d497b 100644
--- a/src/NormPropsData.zig
+++ b/src/NormPropsData.zig
@@ -23,10 +23,12 @@ pub fn init(allocator: mem.Allocator) !Self {
23 23
24 const stage_1_len: u16 = try reader.readInt(u16, endian); 24 const stage_1_len: u16 = try reader.readInt(u16, endian);
25 self.s1 = try allocator.alloc(u16, stage_1_len); 25 self.s1 = try allocator.alloc(u16, stage_1_len);
26 errdefer allocator.free(self.s1);
26 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 27 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
27 28
28 const stage_2_len: u16 = try reader.readInt(u16, endian); 29 const stage_2_len: u16 = try reader.readInt(u16, endian);
29 self.s2 = try allocator.alloc(u4, stage_2_len); 30 self.s2 = try allocator.alloc(u4, stage_2_len);
31 errdefer allocator.free(self.s2);
30 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); 32 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian));
31 33
32 return self; 34 return self;
diff --git a/src/NumericData.zig b/src/NumericData.zig
index 210d623..28e8206 100644
--- a/src/NumericData.zig
+++ b/src/NumericData.zig
@@ -24,10 +24,12 @@ pub fn init(allocator: mem.Allocator) !Self {
24 24
25 const stage_1_len: u16 = try reader.readInt(u16, endian); 25 const stage_1_len: u16 = try reader.readInt(u16, endian);
26 self.s1 = try allocator.alloc(u16, stage_1_len); 26 self.s1 = try allocator.alloc(u16, stage_1_len);
27 errdefer allocator.free(self.s1);
27 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 28 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
28 29
29 const stage_2_len: u16 = try reader.readInt(u16, endian); 30 const stage_2_len: u16 = try reader.readInt(u16, endian);
30 self.s2 = try allocator.alloc(u8, stage_2_len); 31 self.s2 = try allocator.alloc(u8, stage_2_len);
32 errdefer allocator.free(self.s2);
31 _ = try reader.readAll(self.s2); 33 _ = try reader.readAll(self.s2);
32 34
33 return self; 35 return self;
@@ -38,11 +40,6 @@ pub fn deinit(self: *const Self) void {
38 self.allocator.free(self.s2); 40 self.allocator.free(self.s2);
39} 41}
40 42
41/// True if `cp` is any numeric type.
42pub fn isNumber(self: Self, cp: u21) bool {
43 return self.isNumeric(cp) or self.isDigit(cp) or self.isDecimal(cp);
44}
45
46/// True if `cp` is numeric. 43/// True if `cp` is numeric.
47pub inline fn isNumeric(self: Self, cp: u21) bool { 44pub inline fn isNumeric(self: Self, cp: u21) bool {
48 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 45 return self.s2[self.s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
@@ -62,13 +59,10 @@ test "isDecimal" {
62 const self = try init(testing.allocator); 59 const self = try init(testing.allocator);
63 defer self.deinit(); 60 defer self.deinit();
64 61
65 try testing.expect(self.isNumber('\u{277f}'));
66 try testing.expect(self.isNumber('3'));
67 try testing.expect(self.isNumeric('\u{277f}')); 62 try testing.expect(self.isNumeric('\u{277f}'));
68 try testing.expect(self.isDigit('\u{2070}')); 63 try testing.expect(self.isDigit('\u{2070}'));
69 try testing.expect(self.isDecimal('3')); 64 try testing.expect(self.isDecimal('3'));
70 65
71 try testing.expect(!self.isNumber('z'));
72 try testing.expect(!self.isNumeric('1')); 66 try testing.expect(!self.isNumeric('1'));
73 try testing.expect(!self.isDigit('2')); 67 try testing.expect(!self.isDigit('2'));
74 try testing.expect(!self.isDecimal('g')); 68 try testing.expect(!self.isDecimal('g'));
diff --git a/src/PropsData.zig b/src/PropsData.zig
new file mode 100644
index 0000000..252462e
--- /dev/null
+++ b/src/PropsData.zig
@@ -0,0 +1,123 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6
7allocator: mem.Allocator,
8core_s1: []u16 = undefined,
9core_s2: []u8 = undefined,
10props_s1: []u16 = undefined,
11props_s2: []u8 = undefined,
12
13const Self = @This();
14
15pub fn init(allocator: mem.Allocator) !Self {
16 const decompressor = compress.deflate.decompressor;
17 const endian = builtin.cpu.arch.endian();
18
19 // Process DerivedCoreProperties.txt
20 const core_bytes = @embedFile("core_props");
21 var core_fbs = std.io.fixedBufferStream(core_bytes);
22 var core_decomp = try decompressor(allocator, core_fbs.reader(), null);
23 defer core_decomp.deinit();
24 var core_reader = core_decomp.reader();
25
26 var self = Self{ .allocator = allocator };
27
28 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
29 self.core_s1 = try allocator.alloc(u16, core_stage_1_len);
30 errdefer allocator.free(self.core_s1);
31 for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian);
32
33 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
34 self.core_s2 = try allocator.alloc(u8, core_stage_2_len);
35 errdefer allocator.free(self.core_s2);
36 _ = try core_reader.readAll(self.core_s2);
37
38 // Process PropList.txt
39 const props_bytes = @embedFile("props");
40 var props_fbs = std.io.fixedBufferStream(props_bytes);
41 var props_decomp = try decompressor(allocator, props_fbs.reader(), null);
42 defer props_decomp.deinit();
43 var props_reader = props_decomp.reader();
44
45 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
46 self.props_s1 = try allocator.alloc(u16, stage_1_len);
47 errdefer allocator.free(self.props_s1);
48 for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian);
49
50 const stage_2_len: u16 = try props_reader.readInt(u16, endian);
51 self.props_s2 = try allocator.alloc(u8, stage_2_len);
52 errdefer allocator.free(self.props_s2);
53 _ = try props_reader.readAll(self.props_s2);
54
55 return self;
56}
57
58pub fn deinit(self: *const Self) void {
59 self.allocator.free(self.core_s1);
60 self.allocator.free(self.core_s2);
61 self.allocator.free(self.props_s1);
62 self.allocator.free(self.props_s2);
63}
64
65/// True if `cp` is a mathematical symbol.
66pub inline fn isMath(self: Self, cp: u21) bool {
67 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
68}
69
70/// True if `cp` is an alphabetic character.
71pub inline fn isAlphabetic(self: Self, cp: u21) bool {
72 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
73}
74
75/// True if `cp` is a valid identifier start character.
76pub inline fn isIdStart(self: Self, cp: u21) bool {
77 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
78}
79
80/// True if `cp` is a valid identifier continuation character.
81pub inline fn isIdContinue(self: Self, cp: u21) bool {
82 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
83}
84
85/// True if `cp` is a valid extended identifier start character.
86pub inline fn isXidStart(self: Self, cp: u21) bool {
87 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
88}
89
90/// True if `cp` is a valid extended identifier continuation character.
91pub inline fn isXidContinue(self: Self, cp: u21) bool {
92 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
93}
94
95/// True if `cp` is a whitespace character.
96pub inline fn isWhitespace(self: Self, cp: u21) bool {
97 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
98}
99
100/// True if `cp` is a hexadecimal digit.
101pub inline fn isHexDigit(self: Self, cp: u21) bool {
102 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
103}
104
105/// True if `cp` is a diacritic mark.
106pub inline fn isDiacritic(self: Self, cp: u21) bool {
107 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
108}
109
110test "Props" {
111 const self = try init(testing.allocator);
112 defer self.deinit();
113
114 try testing.expect(self.isHexDigit('F'));
115 try testing.expect(self.isHexDigit('a'));
116 try testing.expect(self.isHexDigit('8'));
117 try testing.expect(!self.isHexDigit('z'));
118
119 try testing.expect(self.isDiacritic('\u{301}'));
120 try testing.expect(self.isAlphabetic('A'));
121 try testing.expect(!self.isAlphabetic('3'));
122 try testing.expect(self.isMath('+'));
123}
diff --git a/src/ScriptsData.zig b/src/ScriptsData.zig
index ac1c46a..4e371bf 100644
--- a/src/ScriptsData.zig
+++ b/src/ScriptsData.zig
@@ -193,14 +193,17 @@ pub fn init(allocator: mem.Allocator) !Self {
193 193
194 const s1_len: u16 = try reader.readInt(u16, endian); 194 const s1_len: u16 = try reader.readInt(u16, endian);
195 self.s1 = try allocator.alloc(u16, s1_len); 195 self.s1 = try allocator.alloc(u16, s1_len);
196 errdefer allocator.free(self.s1);
196 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 197 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
197 198
198 const s2_len: u16 = try reader.readInt(u16, endian); 199 const s2_len: u16 = try reader.readInt(u16, endian);
199 self.s2 = try allocator.alloc(u8, s2_len); 200 self.s2 = try allocator.alloc(u8, s2_len);
201 errdefer allocator.free(self.s2);
200 _ = try reader.readAll(self.s2); 202 _ = try reader.readAll(self.s2);
201 203
202 const s3_len: u16 = try reader.readInt(u8, endian); 204 const s3_len: u16 = try reader.readInt(u8, endian);
203 self.s3 = try allocator.alloc(u8, s3_len); 205 self.s3 = try allocator.alloc(u8, s3_len);
206 errdefer allocator.free(self.s3);
204 _ = try reader.readAll(self.s3); 207 _ = try reader.readAll(self.s3);
205 208
206 return self; 209 return self;
diff --git a/src/WidthData.zig b/src/WidthData.zig
index d17f0cd..b9ef84e 100644
--- a/src/WidthData.zig
+++ b/src/WidthData.zig
@@ -27,13 +27,16 @@ pub fn init(allocator: mem.Allocator) !Self {
27 .allocator = allocator, 27 .allocator = allocator,
28 .g_data = try GraphemeData.init(allocator), 28 .g_data = try GraphemeData.init(allocator),
29 }; 29 };
30 errdefer self.g_data.deinit();
30 31
31 const stage_1_len: u16 = try reader.readInt(u16, endian); 32 const stage_1_len: u16 = try reader.readInt(u16, endian);
32 self.s1 = try allocator.alloc(u16, stage_1_len); 33 self.s1 = try allocator.alloc(u16, stage_1_len);
34 errdefer allocator.free(self.s1);
33 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 35 for (0..stage_1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
34 36
35 const stage_2_len: u16 = try reader.readInt(u16, endian); 37 const stage_2_len: u16 = try reader.readInt(u16, endian);
36 self.s2 = try allocator.alloc(i3, stage_2_len); 38 self.s2 = try allocator.alloc(i3, stage_2_len);
39 errdefer allocator.free(self.s2);
37 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian)); 40 for (0..stage_2_len) |i| self.s2[i] = @intCast(try reader.readInt(i8, endian));
38 41
39 return self; 42 return self;