summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 16:48:07 -0400
committerGravatar Sam Atman2025-04-30 16:48:07 -0400
commitd2d42bf3ef5490f6fdec73508c2493a666ecee41 (patch)
tree377794be59ece4118ca2449b705b8e7cc646abc0 /src
parentUpdate README.md to new API (diff)
downloadzg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.gz
zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.xz
zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.zip
Setup variants for all allocating modules
This harmonizes the allocating modules in a couple of ways. All can now be constructed by pointer, and all treat various miscellaneous read failures as `unreachable`, which indeed they should be. The README has been updated to inform users of this option.
Diffstat (limited to 'src')
-rw-r--r--src/CaseFolding.zig18
-rw-r--r--src/GeneralCategories.zig59
-rw-r--r--src/Graphemes.zig32
-rw-r--r--src/LetterCasing.zig62
-rw-r--r--src/Normalize.zig39
-rw-r--r--src/Properties.zig98
-rw-r--r--src/Scripts.zig66
7 files changed, 228 insertions, 146 deletions
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index 162e82f..2e53bfa 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -11,20 +11,21 @@ owns_normalize: bool,
11 11
12const CaseFolding = @This(); 12const CaseFolding = @This();
13 13
14pub fn init(allocator: Allocator) !CaseFolding { 14pub fn init(allocator: Allocator) Allocator.Error!CaseFolding {
15 var case_fold: CaseFolding = undefined; 15 var case_fold: CaseFolding = undefined;
16 try case_fold.setup(allocator); 16 try case_fold.setup(allocator);
17 return case_fold; 17 return case_fold;
18} 18}
19 19
20pub fn initWithNormalize(allocator: Allocator, norm: Normalize) !CaseFolding { 20pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding {
21 var casefold: CaseFolding = undefined; 21 var casefold: CaseFolding = undefined;
22 try casefold.setupWithNormalize(allocator, norm); 22 try casefold.setupWithNormalize(allocator, norm);
23 return casefold; 23 return casefold;
24} 24}
25 25
26pub fn setup(casefold: *CaseFolding, allocator: Allocator) !void { 26pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void {
27 try casefold.setupImpl(allocator); 27 try casefold.setupImpl(allocator);
28 // Handle normalize memory separately during setup:
28 casefold.owns_normalize = false; 29 casefold.owns_normalize = false;
29 errdefer casefold.deinit(allocator); 30 errdefer casefold.deinit(allocator);
30 try casefold.normalize.setup(allocator); 31 try casefold.normalize.setup(allocator);
@@ -37,7 +38,16 @@ pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: No
37 casefold.owns_normalize = false; 38 casefold.owns_normalize = false;
38} 39}
39 40
40fn setupImpl(casefold: *CaseFolding, allocator: Allocator) !void { 41fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void {
42 casefold.setupImplInner(allocator) catch |err| {
43 switch (err) {
44 error.OutOfMemory => |e| return e,
45 else => unreachable,
46 }
47 };
48}
49
50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void {
41 const decompressor = compress.flate.inflate.decompressor; 51 const decompressor = compress.flate.inflate.decompressor;
42 const in_bytes = @embedFile("fold"); 52 const in_bytes = @embedFile("fold");
43 var in_fbs = std.io.fixedBufferStream(in_bytes); 53 var in_fbs = std.io.fixedBufferStream(in_bytes);
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig
index a69f7a2..b7c82c0 100644
--- a/src/GeneralCategories.zig
+++ b/src/GeneralCategories.zig
@@ -1,7 +1,8 @@
1const std = @import("std"); 1//! General Categories
2const builtin = @import("builtin"); 2
3const compress = std.compress; 3s1: []u16 = undefined,
4const mem = std.mem; 4s2: []u5 = undefined,
5s3: []u5 = undefined,
5 6
6/// General Category 7/// General Category
7pub const Gc = enum { 8pub const Gc = enum {
@@ -37,13 +38,15 @@ pub const Gc = enum {
37 Zs, // Separator, Space 38 Zs, // Separator, Space
38}; 39};
39 40
40s1: []u16 = undefined, 41const GeneralCategories = @This();
41s2: []u5 = undefined,
42s3: []u5 = undefined,
43 42
44const Self = @This(); 43pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories {
44 var gencat = GeneralCategories{};
45 try gencat.setup(allocator);
46 return gencat;
47}
45 48
46pub fn init(allocator: mem.Allocator) !Self { 49pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void {
47 const decompressor = compress.flate.inflate.decompressor; 50 const decompressor = compress.flate.inflate.decompressor;
48 const in_bytes = @embedFile("gencat"); 51 const in_bytes = @embedFile("gencat");
49 var in_fbs = std.io.fixedBufferStream(in_bytes); 52 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -52,39 +55,35 @@ pub fn init(allocator: mem.Allocator) !Self {
52 55
53 const endian = builtin.cpu.arch.endian(); 56 const endian = builtin.cpu.arch.endian();
54 57
55 var self = Self{}; 58 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
56
57 const s1_len: u16 = try reader.readInt(u16, endian);
58 self.s1 = try allocator.alloc(u16, s1_len); 59 self.s1 = try allocator.alloc(u16, s1_len);
59 errdefer allocator.free(self.s1); 60 errdefer allocator.free(self.s1);
60 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 61 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
61 62
62 const s2_len: u16 = try reader.readInt(u16, endian); 63 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
63 self.s2 = try allocator.alloc(u5, s2_len); 64 self.s2 = try allocator.alloc(u5, s2_len);
64 errdefer allocator.free(self.s2); 65 errdefer allocator.free(self.s2);
65 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); 66 for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
66 67
67 const s3_len: u16 = try reader.readInt(u8, endian); 68 const s3_len: u16 = reader.readInt(u8, endian) catch unreachable;
68 self.s3 = try allocator.alloc(u5, s3_len); 69 self.s3 = try allocator.alloc(u5, s3_len);
69 errdefer allocator.free(self.s3); 70 errdefer allocator.free(self.s3);
70 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); 71 for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
71
72 return self;
73} 72}
74 73
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 74pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void {
76 allocator.free(self.s1); 75 allocator.free(self.s1);
77 allocator.free(self.s2); 76 allocator.free(self.s2);
78 allocator.free(self.s3); 77 allocator.free(self.s3);
79} 78}
80 79
81/// Lookup the General Category for `cp`. 80/// Lookup the General Category for `cp`.
82pub fn gc(self: Self, cp: u21) Gc { 81pub fn gc(self: GeneralCategories, cp: u21) Gc {
83 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); 82 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
84} 83}
85 84
86/// True if `cp` has an C general category. 85/// True if `cp` has an C general category.
87pub fn isControl(self: Self, cp: u21) bool { 86pub fn isControl(self: GeneralCategories, cp: u21) bool {
88 return switch (self.gc(cp)) { 87 return switch (self.gc(cp)) {
89 .Cc, 88 .Cc,
90 .Cf, 89 .Cf,
@@ -97,7 +96,7 @@ pub fn isControl(self: Self, cp: u21) bool {
97} 96}
98 97
99/// True if `cp` has an L general category. 98/// True if `cp` has an L general category.
100pub fn isLetter(self: Self, cp: u21) bool { 99pub fn isLetter(self: GeneralCategories, cp: u21) bool {
101 return switch (self.gc(cp)) { 100 return switch (self.gc(cp)) {
102 .Ll, 101 .Ll,
103 .Lm, 102 .Lm,
@@ -110,7 +109,7 @@ pub fn isLetter(self: Self, cp: u21) bool {
110} 109}
111 110
112/// True if `cp` has an M general category. 111/// True if `cp` has an M general category.
113pub fn isMark(self: Self, cp: u21) bool { 112pub fn isMark(self: GeneralCategories, cp: u21) bool {
114 return switch (self.gc(cp)) { 113 return switch (self.gc(cp)) {
115 .Mc, 114 .Mc,
116 .Me, 115 .Me,
@@ -121,7 +120,7 @@ pub fn isMark(self: Self, cp: u21) bool {
121} 120}
122 121
123/// True if `cp` has an N general category. 122/// True if `cp` has an N general category.
124pub fn isNumber(self: Self, cp: u21) bool { 123pub fn isNumber(self: GeneralCategories, cp: u21) bool {
125 return switch (self.gc(cp)) { 124 return switch (self.gc(cp)) {
126 .Nd, 125 .Nd,
127 .Nl, 126 .Nl,
@@ -132,7 +131,7 @@ pub fn isNumber(self: Self, cp: u21) bool {
132} 131}
133 132
134/// True if `cp` has an P general category. 133/// True if `cp` has an P general category.
135pub fn isPunctuation(self: Self, cp: u21) bool { 134pub fn isPunctuation(self: GeneralCategories, cp: u21) bool {
136 return switch (self.gc(cp)) { 135 return switch (self.gc(cp)) {
137 .Pc, 136 .Pc,
138 .Pd, 137 .Pd,
@@ -147,7 +146,7 @@ pub fn isPunctuation(self: Self, cp: u21) bool {
147} 146}
148 147
149/// True if `cp` has an S general category. 148/// True if `cp` has an S general category.
150pub fn isSymbol(self: Self, cp: u21) bool { 149pub fn isSymbol(self: GeneralCategories, cp: u21) bool {
151 return switch (self.gc(cp)) { 150 return switch (self.gc(cp)) {
152 .Sc, 151 .Sc,
153 .Sk, 152 .Sk,
@@ -159,7 +158,7 @@ pub fn isSymbol(self: Self, cp: u21) bool {
159} 158}
160 159
161/// True if `cp` has an Z general category. 160/// True if `cp` has an Z general category.
162pub fn isSeparator(self: Self, cp: u21) bool { 161pub fn isSeparator(self: GeneralCategories, cp: u21) bool {
163 return switch (self.gc(cp)) { 162 return switch (self.gc(cp)) {
164 .Zl, 163 .Zl,
165 .Zp, 164 .Zp,
@@ -168,3 +167,9 @@ pub fn isSeparator(self: Self, cp: u21) bool {
168 else => false, 167 else => false,
169 }; 168 };
170} 169}
170
171const std = @import("std");
172const builtin = @import("builtin");
173const compress = std.compress;
174const mem = std.mem;
175const Allocator = mem.Allocator;
diff --git a/src/Graphemes.zig b/src/Graphemes.zig
index 79cd2c6..7bf328a 100644
--- a/src/Graphemes.zig
+++ b/src/Graphemes.zig
@@ -14,7 +14,13 @@ s3: []u8 = undefined,
14 14
15const Graphemes = @This(); 15const Graphemes = @This();
16 16
17pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { 17pub fn init(allocator: Allocator) Allocator.Error!Graphemes {
18 var graphemes = Graphemes{};
19 try graphemes.setup(allocator);
20 return graphemes;
21}
22
23pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void {
18 const decompressor = compress.flate.inflate.decompressor; 24 const decompressor = compress.flate.inflate.decompressor;
19 const in_bytes = @embedFile("gbp"); 25 const in_bytes = @embedFile("gbp");
20 var in_fbs = std.io.fixedBufferStream(in_bytes); 26 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -23,27 +29,23 @@ pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes {
23 29
24 const endian = builtin.cpu.arch.endian(); 30 const endian = builtin.cpu.arch.endian();
25 31
26 var self = Graphemes{};
27
28 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; 32 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
29 self.s1 = try allocator.alloc(u16, s1_len); 33 graphemes.s1 = try allocator.alloc(u16, s1_len);
30 errdefer allocator.free(self.s1); 34 errdefer allocator.free(graphemes.s1);
31 for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; 35 for (0..s1_len) |i| graphemes.s1[i] = reader.readInt(u16, endian) catch unreachable;
32 36
33 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; 37 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
34 self.s2 = try allocator.alloc(u16, s2_len); 38 graphemes.s2 = try allocator.alloc(u16, s2_len);
35 errdefer allocator.free(self.s2); 39 errdefer allocator.free(graphemes.s2);
36 for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; 40 for (0..s2_len) |i| graphemes.s2[i] = reader.readInt(u16, endian) catch unreachable;
37 41
38 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; 42 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable;
39 self.s3 = try allocator.alloc(u8, s3_len); 43 graphemes.s3 = try allocator.alloc(u8, s3_len);
40 errdefer allocator.free(self.s3); 44 errdefer allocator.free(graphemes.s3);
41 _ = reader.readAll(self.s3) catch unreachable; 45 _ = reader.readAll(graphemes.s3) catch unreachable;
42
43 return self;
44} 46}
45 47
46pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { 48pub fn deinit(graphemes: *const Graphemes, allocator: Allocator) void {
47 allocator.free(graphemes.s1); 49 allocator.free(graphemes.s1);
48 allocator.free(graphemes.s2); 50 allocator.free(graphemes.s2);
49 allocator.free(graphemes.s3); 51 allocator.free(graphemes.s3);
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig
index 0a0acb1..a7260b8 100644
--- a/src/LetterCasing.zig
+++ b/src/LetterCasing.zig
@@ -1,25 +1,31 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6const unicode = std.unicode;
7
8const CodePointIterator = @import("code_point").Iterator; 1const CodePointIterator = @import("code_point").Iterator;
9 2
10case_map: [][2]u21, 3case_map: [][2]u21 = undefined,
11prop_s1: []u16 = undefined, 4prop_s1: []u16 = undefined,
12prop_s2: []u8 = undefined, 5prop_s2: []u8 = undefined,
13 6
14const Self = @This(); 7const LetterCasing = @This();
8
9pub fn init(allocator: Allocator) Allocator.Error!LetterCasing {
10 var case = LetterCasing{};
11 try case.setup(allocator);
12 return case;
13}
15 14
16pub fn init(allocator: mem.Allocator) !Self { 15pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void {
16 case.setupInner(allocator) catch |err| {
17 switch (err) {
18 error.OutOfMemory => |e| return e,
19 else => unreachable,
20 }
21 };
22}
23
24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
17 const decompressor = compress.flate.inflate.decompressor; 25 const decompressor = compress.flate.inflate.decompressor;
18 const endian = builtin.cpu.arch.endian(); 26 const endian = builtin.cpu.arch.endian();
19 27
20 var self = Self{ 28 self.case_map = try allocator.alloc([2]u21, 0x110000);
21 .case_map = try allocator.alloc([2]u21, 0x110000),
22 };
23 errdefer allocator.free(self.case_map); 29 errdefer allocator.free(self.case_map);
24 30
25 for (0..0x110000) |i| { 31 for (0..0x110000) |i| {
@@ -68,28 +74,26 @@ pub fn init(allocator: mem.Allocator) !Self {
68 self.prop_s2 = try allocator.alloc(u8, stage_2_len); 74 self.prop_s2 = try allocator.alloc(u8, stage_2_len);
69 errdefer allocator.free(self.prop_s2); 75 errdefer allocator.free(self.prop_s2);
70 _ = try cp_reader.readAll(self.prop_s2); 76 _ = try cp_reader.readAll(self.prop_s2);
71
72 return self;
73} 77}
74 78
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 79pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void {
76 allocator.free(self.case_map); 80 allocator.free(self.case_map);
77 allocator.free(self.prop_s1); 81 allocator.free(self.prop_s1);
78 allocator.free(self.prop_s2); 82 allocator.free(self.prop_s2);
79} 83}
80 84
81// Returns true if `cp` is either upper, lower, or title case. 85// Returns true if `cp` is either upper, lower, or title case.
82pub fn isCased(self: Self, cp: u21) bool { 86pub fn isCased(self: LetterCasing, cp: u21) bool {
83 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 87 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
84} 88}
85 89
86// Returns true if `cp` is uppercase. 90// Returns true if `cp` is uppercase.
87pub fn isUpper(self: Self, cp: u21) bool { 91pub fn isUpper(self: LetterCasing, cp: u21) bool {
88 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 92 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
89} 93}
90 94
91/// Returns true if `str` is all uppercase. 95/// Returns true if `str` is all uppercase.
92pub fn isUpperStr(self: Self, str: []const u8) bool { 96pub fn isUpperStr(self: LetterCasing, str: []const u8) bool {
93 var iter = CodePointIterator{ .bytes = str }; 97 var iter = CodePointIterator{ .bytes = str };
94 98
95 return while (iter.next()) |cp| { 99 return while (iter.next()) |cp| {
@@ -107,14 +111,14 @@ test "isUpperStr" {
107} 111}
108 112
109/// Returns uppercase mapping for `cp`. 113/// Returns uppercase mapping for `cp`.
110pub fn toUpper(self: Self, cp: u21) u21 { 114pub fn toUpper(self: LetterCasing, cp: u21) u21 {
111 return self.case_map[cp][0]; 115 return self.case_map[cp][0];
112} 116}
113 117
114/// Returns a new string with all letters in uppercase. 118/// Returns a new string with all letters in uppercase.
115/// Caller must free returned bytes with `allocator`. 119/// Caller must free returned bytes with `allocator`.
116pub fn toUpperStr( 120pub fn toUpperStr(
117 self: Self, 121 self: LetterCasing,
118 allocator: mem.Allocator, 122 allocator: mem.Allocator,
119 str: []const u8, 123 str: []const u8,
120) ![]u8 { 124) ![]u8 {
@@ -142,12 +146,12 @@ test "toUpperStr" {
142} 146}
143 147
144// Returns true if `cp` is lowercase. 148// Returns true if `cp` is lowercase.
145pub fn isLower(self: Self, cp: u21) bool { 149pub fn isLower(self: LetterCasing, cp: u21) bool {
146 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 150 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
147} 151}
148 152
149/// Returns true if `str` is all lowercase. 153/// Returns true if `str` is all lowercase.
150pub fn isLowerStr(self: Self, str: []const u8) bool { 154pub fn isLowerStr(self: LetterCasing, str: []const u8) bool {
151 var iter = CodePointIterator{ .bytes = str }; 155 var iter = CodePointIterator{ .bytes = str };
152 156
153 return while (iter.next()) |cp| { 157 return while (iter.next()) |cp| {
@@ -165,14 +169,14 @@ test "isLowerStr" {
165} 169}
166 170
167/// Returns lowercase mapping for `cp`. 171/// Returns lowercase mapping for `cp`.
168pub fn toLower(self: Self, cp: u21) u21 { 172pub fn toLower(self: LetterCasing, cp: u21) u21 {
169 return self.case_map[cp][1]; 173 return self.case_map[cp][1];
170} 174}
171 175
172/// Returns a new string with all letters in lowercase. 176/// Returns a new string with all letters in lowercase.
173/// Caller must free returned bytes with `allocator`. 177/// Caller must free returned bytes with `allocator`.
174pub fn toLowerStr( 178pub fn toLowerStr(
175 self: Self, 179 self: LetterCasing,
176 allocator: mem.Allocator, 180 allocator: mem.Allocator,
177 str: []const u8, 181 str: []const u8,
178) ![]u8 { 182) ![]u8 {
@@ -198,3 +202,11 @@ test "toLowerStr" {
198 defer testing.allocator.free(lowered); 202 defer testing.allocator.free(lowered);
199 try testing.expectEqualStrings("hello, world 2112!", lowered); 203 try testing.expectEqualStrings("hello, world 2112!", lowered);
200} 204}
205
206const std = @import("std");
207const builtin = @import("builtin");
208const compress = std.compress;
209const mem = std.mem;
210const Allocator = std.mem.Allocator;
211const testing = std.testing;
212const unicode = std.unicode;
diff --git a/src/Normalize.zig b/src/Normalize.zig
index d8c867d..1500b4c 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -10,22 +10,47 @@ normp_data: NormPropsData = undefined,
10 10
11const Normalize = @This(); 11const Normalize = @This();
12 12
13pub fn init(allocator: Allocator) !Normalize { 13pub fn init(allocator: Allocator) Allocator.Error!Normalize {
14 var norm: Normalize = undefined; 14 var norm: Normalize = undefined;
15 try norm.setup(allocator); 15 try norm.setup(allocator);
16 return norm; 16 return norm;
17} 17}
18 18
19pub fn setup(self: *Normalize, allocator: Allocator) !void { 19pub fn setup(self: *Normalize, allocator: Allocator) Allocator.Error!void {
20 self.canon_data = try CanonData.init(allocator); 20 self.canon_data = CanonData.init(allocator) catch |err| {
21 switch (err) {
22 error.OutOfMemory => |e| return e,
23 else => unreachable,
24 }
25 };
21 errdefer self.canon_data.deinit(allocator); 26 errdefer self.canon_data.deinit(allocator);
22 self.ccc_data = try CccData.init(allocator); 27 self.ccc_data = CccData.init(allocator) catch |err| {
28 switch (err) {
29 error.OutOfMemory => |e| return e,
30 else => unreachable,
31 }
32 };
23 errdefer self.ccc_data.deinit(allocator); 33 errdefer self.ccc_data.deinit(allocator);
24 self.compat_data = try CompatData.init(allocator); 34 self.compat_data = CompatData.init(allocator) catch |err| {
35 switch (err) {
36 error.OutOfMemory => |e| return e,
37 else => unreachable,
38 }
39 };
25 errdefer self.compat_data.deinit(allocator); 40 errdefer self.compat_data.deinit(allocator);
26 self.hangul_data = try HangulData.init(allocator); 41 self.hangul_data = HangulData.init(allocator) catch |err| {
42 switch (err) {
43 error.OutOfMemory => |e| return e,
44 else => unreachable,
45 }
46 };
27 errdefer self.hangul_data.deinit(allocator); 47 errdefer self.hangul_data.deinit(allocator);
28 self.normp_data = try NormPropsData.init(allocator); 48 self.normp_data = NormPropsData.init(allocator) catch |err| {
49 switch (err) {
50 error.OutOfMemory => |e| return e,
51 else => unreachable,
52 }
53 };
29} 54}
30 55
31pub fn deinit(norm: *const Normalize, allocator: Allocator) void { 56pub fn deinit(norm: *const Normalize, allocator: Allocator) void {
diff --git a/src/Properties.zig b/src/Properties.zig
index 46920be..f7e57ec 100644
--- a/src/Properties.zig
+++ b/src/Properties.zig
@@ -1,8 +1,4 @@
1const std = @import("std"); 1//! Properties module
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6 2
7core_s1: []u16 = undefined, 3core_s1: []u16 = undefined,
8core_s2: []u8 = undefined, 4core_s2: []u8 = undefined,
@@ -11,9 +7,24 @@ props_s2: []u8 = undefined,
11num_s1: []u16 = undefined, 7num_s1: []u16 = undefined,
12num_s2: []u8 = undefined, 8num_s2: []u8 = undefined,
13 9
14const Self = @This(); 10const Properties = @This();
11
12pub fn init(allocator: Allocator) Allocator.Error!Properties {
13 var props = Properties{};
14 try props.setup(allocator);
15 return props;
16}
17
18pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void {
19 props.setupInner(allocator) catch |err| {
20 switch (err) {
21 error.OutOfMemory => |e| return e,
22 else => unreachable,
23 }
24 };
25}
15 26
16pub fn init(allocator: mem.Allocator) !Self { 27inline fn setupInner(props: *Properties, allocator: Allocator) !void {
17 const decompressor = compress.flate.inflate.decompressor; 28 const decompressor = compress.flate.inflate.decompressor;
18 const endian = builtin.cpu.arch.endian(); 29 const endian = builtin.cpu.arch.endian();
19 30
@@ -23,17 +34,15 @@ pub fn init(allocator: mem.Allocator) !Self {
23 var core_decomp = decompressor(.raw, core_fbs.reader()); 34 var core_decomp = decompressor(.raw, core_fbs.reader());
24 var core_reader = core_decomp.reader(); 35 var core_reader = core_decomp.reader();
25 36
26 var self = Self{};
27
28 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); 37 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
29 self.core_s1 = try allocator.alloc(u16, core_stage_1_len); 38 props.core_s1 = try allocator.alloc(u16, core_stage_1_len);
30 errdefer allocator.free(self.core_s1); 39 errdefer allocator.free(props.core_s1);
31 for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian); 40 for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian);
32 41
33 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); 42 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
34 self.core_s2 = try allocator.alloc(u8, core_stage_2_len); 43 props.core_s2 = try allocator.alloc(u8, core_stage_2_len);
35 errdefer allocator.free(self.core_s2); 44 errdefer allocator.free(props.core_s2);
36 _ = try core_reader.readAll(self.core_s2); 45 _ = try core_reader.readAll(props.core_s2);
37 46
38 // Process PropList.txt 47 // Process PropList.txt
39 const props_bytes = @embedFile("props"); 48 const props_bytes = @embedFile("props");
@@ -42,14 +51,14 @@ pub fn init(allocator: mem.Allocator) !Self {
42 var props_reader = props_decomp.reader(); 51 var props_reader = props_decomp.reader();
43 52
44 const stage_1_len: u16 = try props_reader.readInt(u16, endian); 53 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
45 self.props_s1 = try allocator.alloc(u16, stage_1_len); 54 props.props_s1 = try allocator.alloc(u16, stage_1_len);
46 errdefer allocator.free(self.props_s1); 55 errdefer allocator.free(props.props_s1);
47 for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian); 56 for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian);
48 57
49 const stage_2_len: u16 = try props_reader.readInt(u16, endian); 58 const stage_2_len: u16 = try props_reader.readInt(u16, endian);
50 self.props_s2 = try allocator.alloc(u8, stage_2_len); 59 props.props_s2 = try allocator.alloc(u8, stage_2_len);
51 errdefer allocator.free(self.props_s2); 60 errdefer allocator.free(props.props_s2);
52 _ = try props_reader.readAll(self.props_s2); 61 _ = try props_reader.readAll(props.props_s2);
53 62
54 // Process DerivedNumericType.txt 63 // Process DerivedNumericType.txt
55 const num_bytes = @embedFile("numeric"); 64 const num_bytes = @embedFile("numeric");
@@ -58,19 +67,17 @@ pub fn init(allocator: mem.Allocator) !Self {
58 var num_reader = num_decomp.reader(); 67 var num_reader = num_decomp.reader();
59 68
60 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); 69 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
61 self.num_s1 = try allocator.alloc(u16, num_stage_1_len); 70 props.num_s1 = try allocator.alloc(u16, num_stage_1_len);
62 errdefer allocator.free(self.num_s1); 71 errdefer allocator.free(props.num_s1);
63 for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian); 72 for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian);
64 73
65 const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); 74 const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
66 self.num_s2 = try allocator.alloc(u8, num_stage_2_len); 75 props.num_s2 = try allocator.alloc(u8, num_stage_2_len);
67 errdefer allocator.free(self.num_s2); 76 errdefer allocator.free(props.num_s2);
68 _ = try num_reader.readAll(self.num_s2); 77 _ = try num_reader.readAll(props.num_s2);
69
70 return self;
71} 78}
72 79
73pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 80pub fn deinit(self: *const Properties, allocator: Allocator) void {
74 allocator.free(self.core_s1); 81 allocator.free(self.core_s1);
75 allocator.free(self.core_s2); 82 allocator.free(self.core_s2);
76 allocator.free(self.props_s1); 83 allocator.free(self.props_s1);
@@ -80,62 +87,62 @@ pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
80} 87}
81 88
82/// True if `cp` is a mathematical symbol. 89/// True if `cp` is a mathematical symbol.
83pub fn isMath(self: Self, cp: u21) bool { 90pub fn isMath(self: Properties, cp: u21) bool {
84 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 91 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
85} 92}
86 93
87/// True if `cp` is an alphabetic character. 94/// True if `cp` is an alphabetic character.
88pub fn isAlphabetic(self: Self, cp: u21) bool { 95pub fn isAlphabetic(self: Properties, cp: u21) bool {
89 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 96 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
90} 97}
91 98
92/// True if `cp` is a valid identifier start character. 99/// True if `cp` is a valid identifier start character.
93pub fn isIdStart(self: Self, cp: u21) bool { 100pub fn isIdStart(self: Properties, cp: u21) bool {
94 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 101 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
95} 102}
96 103
97/// True if `cp` is a valid identifier continuation character. 104/// True if `cp` is a valid identifier continuation character.
98pub fn isIdContinue(self: Self, cp: u21) bool { 105pub fn isIdContinue(self: Properties, cp: u21) bool {
99 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; 106 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
100} 107}
101 108
102/// True if `cp` is a valid extended identifier start character. 109/// True if `cp` is a valid extended identifier start character.
103pub fn isXidStart(self: Self, cp: u21) bool { 110pub fn isXidStart(self: Properties, cp: u21) bool {
104 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; 111 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
105} 112}
106 113
107/// True if `cp` is a valid extended identifier continuation character. 114/// True if `cp` is a valid extended identifier continuation character.
108pub fn isXidContinue(self: Self, cp: u21) bool { 115pub fn isXidContinue(self: Properties, cp: u21) bool {
109 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; 116 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
110} 117}
111 118
112/// True if `cp` is a whitespace character. 119/// True if `cp` is a whitespace character.
113pub fn isWhitespace(self: Self, cp: u21) bool { 120pub fn isWhitespace(self: Properties, cp: u21) bool {
114 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 121 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
115} 122}
116 123
117/// True if `cp` is a hexadecimal digit. 124/// True if `cp` is a hexadecimal digit.
118pub fn isHexDigit(self: Self, cp: u21) bool { 125pub fn isHexDigit(self: Properties, cp: u21) bool {
119 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 126 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
120} 127}
121 128
122/// True if `cp` is a diacritic mark. 129/// True if `cp` is a diacritic mark.
123pub fn isDiacritic(self: Self, cp: u21) bool { 130pub fn isDiacritic(self: Properties, cp: u21) bool {
124 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 131 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
125} 132}
126 133
127/// True if `cp` is numeric. 134/// True if `cp` is numeric.
128pub fn isNumeric(self: Self, cp: u21) bool { 135pub fn isNumeric(self: Properties, cp: u21) bool {
129 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 136 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
130} 137}
131 138
132/// True if `cp` is a digit. 139/// True if `cp` is a digit.
133pub fn isDigit(self: Self, cp: u21) bool { 140pub fn isDigit(self: Properties, cp: u21) bool {
134 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 141 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
135} 142}
136 143
137/// True if `cp` is decimal. 144/// True if `cp` is decimal.
138pub fn isDecimal(self: Self, cp: u21) bool { 145pub fn isDecimal(self: Properties, cp: u21) bool {
139 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 146 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
140} 147}
141 148
@@ -161,3 +168,10 @@ test "Props" {
161 try testing.expect(!self.isDigit('2')); 168 try testing.expect(!self.isDigit('2'));
162 try testing.expect(!self.isDecimal('g')); 169 try testing.expect(!self.isDecimal('g'));
163} 170}
171
172const std = @import("std");
173const builtin = @import("builtin");
174const compress = std.compress;
175const mem = std.mem;
176const Allocator = mem.Allocator;
177const testing = std.testing;
diff --git a/src/Scripts.zig b/src/Scripts.zig
index 4ad8549..f71a2b5 100644
--- a/src/Scripts.zig
+++ b/src/Scripts.zig
@@ -1,10 +1,10 @@
1const std = @import("std"); 1//! Scripts Module
2const builtin = @import("builtin"); 2
3const compress = std.compress; 3s1: []u16 = undefined,
4const mem = std.mem; 4s2: []u8 = undefined,
5const testing = std.testing; 5s3: []u8 = undefined,
6 6
7/// Scripts 7/// Scripts enum
8pub const Script = enum { 8pub const Script = enum {
9 none, 9 none,
10 Adlam, 10 Adlam,
@@ -172,13 +172,24 @@ pub const Script = enum {
172 Zanabazar_Square, 172 Zanabazar_Square,
173}; 173};
174 174
175s1: []u16 = undefined, 175const Scripts = @This();
176s2: []u8 = undefined,
177s3: []u8 = undefined,
178 176
179const Self = @This(); 177pub fn init(allocator: Allocator) Allocator.Error!Scripts {
178 var scripts = Scripts{};
179 try scripts.setup(allocator);
180 return scripts;
181}
182
183pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void {
184 scripts.setupInner(allocator) catch |err| {
185 switch (err) {
186 error.OutOfMemory => |e| return e,
187 else => unreachable,
188 }
189 };
190}
180 191
181pub fn init(allocator: mem.Allocator) !Self { 192inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void {
182 const decompressor = compress.flate.inflate.decompressor; 193 const decompressor = compress.flate.inflate.decompressor;
183 const in_bytes = @embedFile("scripts"); 194 const in_bytes = @embedFile("scripts");
184 var in_fbs = std.io.fixedBufferStream(in_bytes); 195 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -187,34 +198,30 @@ pub fn init(allocator: mem.Allocator) !Self {
187 198
188 const endian = builtin.cpu.arch.endian(); 199 const endian = builtin.cpu.arch.endian();
189 200
190 var self = Self{};
191
192 const s1_len: u16 = try reader.readInt(u16, endian); 201 const s1_len: u16 = try reader.readInt(u16, endian);
193 self.s1 = try allocator.alloc(u16, s1_len); 202 scripts.s1 = try allocator.alloc(u16, s1_len);
194 errdefer allocator.free(self.s1); 203 errdefer allocator.free(scripts.s1);
195 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 204 for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian);
196 205
197 const s2_len: u16 = try reader.readInt(u16, endian); 206 const s2_len: u16 = try reader.readInt(u16, endian);
198 self.s2 = try allocator.alloc(u8, s2_len); 207 scripts.s2 = try allocator.alloc(u8, s2_len);
199 errdefer allocator.free(self.s2); 208 errdefer allocator.free(scripts.s2);
200 _ = try reader.readAll(self.s2); 209 _ = try reader.readAll(scripts.s2);
201 210
202 const s3_len: u16 = try reader.readInt(u8, endian); 211 const s3_len: u16 = try reader.readInt(u8, endian);
203 self.s3 = try allocator.alloc(u8, s3_len); 212 scripts.s3 = try allocator.alloc(u8, s3_len);
204 errdefer allocator.free(self.s3); 213 errdefer allocator.free(scripts.s3);
205 _ = try reader.readAll(self.s3); 214 _ = try reader.readAll(scripts.s3);
206
207 return self;
208} 215}
209 216
210pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 217pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void {
211 allocator.free(self.s1); 218 allocator.free(self.s1);
212 allocator.free(self.s2); 219 allocator.free(self.s2);
213 allocator.free(self.s3); 220 allocator.free(self.s3);
214} 221}
215 222
216/// Lookup the Script type for `cp`. 223/// Lookup the Script type for `cp`.
217pub fn script(self: Self, cp: u21) ?Script { 224pub fn script(self: Scripts, cp: u21) ?Script {
218 const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; 225 const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
219 if (byte == 0) return null; 226 if (byte == 0) return null;
220 return @enumFromInt(byte); 227 return @enumFromInt(byte);
@@ -225,3 +232,10 @@ test "script" {
225 defer self.deinit(std.testing.allocator); 232 defer self.deinit(std.testing.allocator);
226 try testing.expectEqual(Script.Latin, self.script('A').?); 233 try testing.expectEqual(Script.Latin, self.script('A').?);
227} 234}
235
236const std = @import("std");
237const builtin = @import("builtin");
238const compress = std.compress;
239const mem = std.mem;
240const Allocator = mem.Allocator;
241const testing = std.testing;