summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 16:48:07 -0400
committerGravatar Sam Atman2025-04-30 16:48:07 -0400
commitd2d42bf3ef5490f6fdec73508c2493a666ecee41 (patch)
tree377794be59ece4118ca2449b705b8e7cc646abc0
parentUpdate README.md to new API (diff)
downloadzg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.gz
zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.xz
zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.zip
Setup variants for all allocating modules
This harmonizes the allocating modules in a couple of ways. All can now be constructed by pointer, and all treat various miscellaneous read failures as `unreachable`, which indeed they should be. The README has been updated to inform users of this option.
-rw-r--r--README.md20
-rw-r--r--src/CaseFolding.zig18
-rw-r--r--src/GeneralCategories.zig59
-rw-r--r--src/Graphemes.zig32
-rw-r--r--src/LetterCasing.zig62
-rw-r--r--src/Normalize.zig39
-rw-r--r--src/Properties.zig98
-rw-r--r--src/Scripts.zig66
8 files changed, 248 insertions, 146 deletions
diff --git a/README.md b/README.md
index 80e544f..538a38d 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,24 @@ zg is a modular library. This approach minimizes binary file size and memory
31requirements by only including the Unicode data required for the specified module. 31requirements by only including the Unicode data required for the specified module.
32The following sections describe the various modules and their specific use case. 32The following sections describe the various modules and their specific use case.
33 33
34### Init and Setup
35
36The code examples will show the use of `Module.init(allocator)` to create the
37various modules. All of the allocating modules have a `setup` variant, which
38takes a pointer and allocates in-place.
39
40Example use:
41
42```zig
43test "Setup form" {
44 var graphemes = try allocator.create(Graphemes);
45 defer allocator.destroy(graphemes);
46 try graphemes.setup(allocator);
47 defer graphemes.deinit(allocator);
48}
49```
50
51
34## Code Points 52## Code Points
35 53
36In the `code_point` module, you'll find a data structure representing a single code 54In the `code_point` module, you'll find a data structure representing a single code
@@ -386,6 +404,8 @@ test "Initialize With a Normalize" {
386 defer case_fold.deinit(allocator); 404 defer case_fold.deinit(allocator);
387} 405}
388``` 406```
407This has a `setupWithNormalize` variant as well, but note that this also takes
408a `Normalize` struct, and not a pointer to it.
389 409
390 410
391## Display Width of Characters and Strings 411## Display Width of Characters and Strings
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index 162e82f..2e53bfa 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -11,20 +11,21 @@ owns_normalize: bool,
11 11
12const CaseFolding = @This(); 12const CaseFolding = @This();
13 13
14pub fn init(allocator: Allocator) !CaseFolding { 14pub fn init(allocator: Allocator) Allocator.Error!CaseFolding {
15 var case_fold: CaseFolding = undefined; 15 var case_fold: CaseFolding = undefined;
16 try case_fold.setup(allocator); 16 try case_fold.setup(allocator);
17 return case_fold; 17 return case_fold;
18} 18}
19 19
20pub fn initWithNormalize(allocator: Allocator, norm: Normalize) !CaseFolding { 20pub fn initWithNormalize(allocator: Allocator, norm: Normalize) Allocator.Error!CaseFolding {
21 var casefold: CaseFolding = undefined; 21 var casefold: CaseFolding = undefined;
22 try casefold.setupWithNormalize(allocator, norm); 22 try casefold.setupWithNormalize(allocator, norm);
23 return casefold; 23 return casefold;
24} 24}
25 25
26pub fn setup(casefold: *CaseFolding, allocator: Allocator) !void { 26pub fn setup(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void {
27 try casefold.setupImpl(allocator); 27 try casefold.setupImpl(allocator);
28 // Handle normalize memory separately during setup:
28 casefold.owns_normalize = false; 29 casefold.owns_normalize = false;
29 errdefer casefold.deinit(allocator); 30 errdefer casefold.deinit(allocator);
30 try casefold.normalize.setup(allocator); 31 try casefold.normalize.setup(allocator);
@@ -37,7 +38,16 @@ pub fn setupWithNormalize(casefold: *CaseFolding, allocator: Allocator, norm: No
37 casefold.owns_normalize = false; 38 casefold.owns_normalize = false;
38} 39}
39 40
40fn setupImpl(casefold: *CaseFolding, allocator: Allocator) !void { 41fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void {
42 casefold.setupImplInner(allocator) catch |err| {
43 switch (err) {
44 error.OutOfMemory => |e| return e,
45 else => unreachable,
46 }
47 };
48}
49
50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void {
41 const decompressor = compress.flate.inflate.decompressor; 51 const decompressor = compress.flate.inflate.decompressor;
42 const in_bytes = @embedFile("fold"); 52 const in_bytes = @embedFile("fold");
43 var in_fbs = std.io.fixedBufferStream(in_bytes); 53 var in_fbs = std.io.fixedBufferStream(in_bytes);
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig
index a69f7a2..b7c82c0 100644
--- a/src/GeneralCategories.zig
+++ b/src/GeneralCategories.zig
@@ -1,7 +1,8 @@
1const std = @import("std"); 1//! General Categories
2const builtin = @import("builtin"); 2
3const compress = std.compress; 3s1: []u16 = undefined,
4const mem = std.mem; 4s2: []u5 = undefined,
5s3: []u5 = undefined,
5 6
6/// General Category 7/// General Category
7pub const Gc = enum { 8pub const Gc = enum {
@@ -37,13 +38,15 @@ pub const Gc = enum {
37 Zs, // Separator, Space 38 Zs, // Separator, Space
38}; 39};
39 40
40s1: []u16 = undefined, 41const GeneralCategories = @This();
41s2: []u5 = undefined,
42s3: []u5 = undefined,
43 42
44const Self = @This(); 43pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories {
44 var gencat = GeneralCategories{};
45 try gencat.setup(allocator);
46 return gencat;
47}
45 48
46pub fn init(allocator: mem.Allocator) !Self { 49pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void {
47 const decompressor = compress.flate.inflate.decompressor; 50 const decompressor = compress.flate.inflate.decompressor;
48 const in_bytes = @embedFile("gencat"); 51 const in_bytes = @embedFile("gencat");
49 var in_fbs = std.io.fixedBufferStream(in_bytes); 52 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -52,39 +55,35 @@ pub fn init(allocator: mem.Allocator) !Self {
52 55
53 const endian = builtin.cpu.arch.endian(); 56 const endian = builtin.cpu.arch.endian();
54 57
55 var self = Self{}; 58 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
56
57 const s1_len: u16 = try reader.readInt(u16, endian);
58 self.s1 = try allocator.alloc(u16, s1_len); 59 self.s1 = try allocator.alloc(u16, s1_len);
59 errdefer allocator.free(self.s1); 60 errdefer allocator.free(self.s1);
60 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 61 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian);
61 62
62 const s2_len: u16 = try reader.readInt(u16, endian); 63 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
63 self.s2 = try allocator.alloc(u5, s2_len); 64 self.s2 = try allocator.alloc(u5, s2_len);
64 errdefer allocator.free(self.s2); 65 errdefer allocator.free(self.s2);
65 for (0..s2_len) |i| self.s2[i] = @intCast(try reader.readInt(u8, endian)); 66 for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
66 67
67 const s3_len: u16 = try reader.readInt(u8, endian); 68 const s3_len: u16 = reader.readInt(u8, endian) catch unreachable;
68 self.s3 = try allocator.alloc(u5, s3_len); 69 self.s3 = try allocator.alloc(u5, s3_len);
69 errdefer allocator.free(self.s3); 70 errdefer allocator.free(self.s3);
70 for (0..s3_len) |i| self.s3[i] = @intCast(try reader.readInt(u8, endian)); 71 for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable);
71
72 return self;
73} 72}
74 73
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 74pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void {
76 allocator.free(self.s1); 75 allocator.free(self.s1);
77 allocator.free(self.s2); 76 allocator.free(self.s2);
78 allocator.free(self.s3); 77 allocator.free(self.s3);
79} 78}
80 79
81/// Lookup the General Category for `cp`. 80/// Lookup the General Category for `cp`.
82pub fn gc(self: Self, cp: u21) Gc { 81pub fn gc(self: GeneralCategories, cp: u21) Gc {
83 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); 82 return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]);
84} 83}
85 84
86/// True if `cp` has an C general category. 85/// True if `cp` has an C general category.
87pub fn isControl(self: Self, cp: u21) bool { 86pub fn isControl(self: GeneralCategories, cp: u21) bool {
88 return switch (self.gc(cp)) { 87 return switch (self.gc(cp)) {
89 .Cc, 88 .Cc,
90 .Cf, 89 .Cf,
@@ -97,7 +96,7 @@ pub fn isControl(self: Self, cp: u21) bool {
97} 96}
98 97
99/// True if `cp` has an L general category. 98/// True if `cp` has an L general category.
100pub fn isLetter(self: Self, cp: u21) bool { 99pub fn isLetter(self: GeneralCategories, cp: u21) bool {
101 return switch (self.gc(cp)) { 100 return switch (self.gc(cp)) {
102 .Ll, 101 .Ll,
103 .Lm, 102 .Lm,
@@ -110,7 +109,7 @@ pub fn isLetter(self: Self, cp: u21) bool {
110} 109}
111 110
112/// True if `cp` has an M general category. 111/// True if `cp` has an M general category.
113pub fn isMark(self: Self, cp: u21) bool { 112pub fn isMark(self: GeneralCategories, cp: u21) bool {
114 return switch (self.gc(cp)) { 113 return switch (self.gc(cp)) {
115 .Mc, 114 .Mc,
116 .Me, 115 .Me,
@@ -121,7 +120,7 @@ pub fn isMark(self: Self, cp: u21) bool {
121} 120}
122 121
123/// True if `cp` has an N general category. 122/// True if `cp` has an N general category.
124pub fn isNumber(self: Self, cp: u21) bool { 123pub fn isNumber(self: GeneralCategories, cp: u21) bool {
125 return switch (self.gc(cp)) { 124 return switch (self.gc(cp)) {
126 .Nd, 125 .Nd,
127 .Nl, 126 .Nl,
@@ -132,7 +131,7 @@ pub fn isNumber(self: Self, cp: u21) bool {
132} 131}
133 132
134/// True if `cp` has an P general category. 133/// True if `cp` has an P general category.
135pub fn isPunctuation(self: Self, cp: u21) bool { 134pub fn isPunctuation(self: GeneralCategories, cp: u21) bool {
136 return switch (self.gc(cp)) { 135 return switch (self.gc(cp)) {
137 .Pc, 136 .Pc,
138 .Pd, 137 .Pd,
@@ -147,7 +146,7 @@ pub fn isPunctuation(self: Self, cp: u21) bool {
147} 146}
148 147
149/// True if `cp` has an S general category. 148/// True if `cp` has an S general category.
150pub fn isSymbol(self: Self, cp: u21) bool { 149pub fn isSymbol(self: GeneralCategories, cp: u21) bool {
151 return switch (self.gc(cp)) { 150 return switch (self.gc(cp)) {
152 .Sc, 151 .Sc,
153 .Sk, 152 .Sk,
@@ -159,7 +158,7 @@ pub fn isSymbol(self: Self, cp: u21) bool {
159} 158}
160 159
161/// True if `cp` has an Z general category. 160/// True if `cp` has an Z general category.
162pub fn isSeparator(self: Self, cp: u21) bool { 161pub fn isSeparator(self: GeneralCategories, cp: u21) bool {
163 return switch (self.gc(cp)) { 162 return switch (self.gc(cp)) {
164 .Zl, 163 .Zl,
165 .Zp, 164 .Zp,
@@ -168,3 +167,9 @@ pub fn isSeparator(self: Self, cp: u21) bool {
168 else => false, 167 else => false,
169 }; 168 };
170} 169}
170
171const std = @import("std");
172const builtin = @import("builtin");
173const compress = std.compress;
174const mem = std.mem;
175const Allocator = mem.Allocator;
diff --git a/src/Graphemes.zig b/src/Graphemes.zig
index 79cd2c6..7bf328a 100644
--- a/src/Graphemes.zig
+++ b/src/Graphemes.zig
@@ -14,7 +14,13 @@ s3: []u8 = undefined,
14 14
15const Graphemes = @This(); 15const Graphemes = @This();
16 16
17pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes { 17pub fn init(allocator: Allocator) Allocator.Error!Graphemes {
18 var graphemes = Graphemes{};
19 try graphemes.setup(allocator);
20 return graphemes;
21}
22
23pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void {
18 const decompressor = compress.flate.inflate.decompressor; 24 const decompressor = compress.flate.inflate.decompressor;
19 const in_bytes = @embedFile("gbp"); 25 const in_bytes = @embedFile("gbp");
20 var in_fbs = std.io.fixedBufferStream(in_bytes); 26 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -23,27 +29,23 @@ pub inline fn init(allocator: mem.Allocator) mem.Allocator.Error!Graphemes {
23 29
24 const endian = builtin.cpu.arch.endian(); 30 const endian = builtin.cpu.arch.endian();
25 31
26 var self = Graphemes{};
27
28 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; 32 const s1_len: u16 = reader.readInt(u16, endian) catch unreachable;
29 self.s1 = try allocator.alloc(u16, s1_len); 33 graphemes.s1 = try allocator.alloc(u16, s1_len);
30 errdefer allocator.free(self.s1); 34 errdefer allocator.free(graphemes.s1);
31 for (0..s1_len) |i| self.s1[i] = reader.readInt(u16, endian) catch unreachable; 35 for (0..s1_len) |i| graphemes.s1[i] = reader.readInt(u16, endian) catch unreachable;
32 36
33 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; 37 const s2_len: u16 = reader.readInt(u16, endian) catch unreachable;
34 self.s2 = try allocator.alloc(u16, s2_len); 38 graphemes.s2 = try allocator.alloc(u16, s2_len);
35 errdefer allocator.free(self.s2); 39 errdefer allocator.free(graphemes.s2);
36 for (0..s2_len) |i| self.s2[i] = reader.readInt(u16, endian) catch unreachable; 40 for (0..s2_len) |i| graphemes.s2[i] = reader.readInt(u16, endian) catch unreachable;
37 41
38 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable; 42 const s3_len: u16 = reader.readInt(u16, endian) catch unreachable;
39 self.s3 = try allocator.alloc(u8, s3_len); 43 graphemes.s3 = try allocator.alloc(u8, s3_len);
40 errdefer allocator.free(self.s3); 44 errdefer allocator.free(graphemes.s3);
41 _ = reader.readAll(self.s3) catch unreachable; 45 _ = reader.readAll(graphemes.s3) catch unreachable;
42
43 return self;
44} 46}
45 47
46pub fn deinit(graphemes: *const Graphemes, allocator: mem.Allocator) void { 48pub fn deinit(graphemes: *const Graphemes, allocator: Allocator) void {
47 allocator.free(graphemes.s1); 49 allocator.free(graphemes.s1);
48 allocator.free(graphemes.s2); 50 allocator.free(graphemes.s2);
49 allocator.free(graphemes.s3); 51 allocator.free(graphemes.s3);
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig
index 0a0acb1..a7260b8 100644
--- a/src/LetterCasing.zig
+++ b/src/LetterCasing.zig
@@ -1,25 +1,31 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6const unicode = std.unicode;
7
8const CodePointIterator = @import("code_point").Iterator; 1const CodePointIterator = @import("code_point").Iterator;
9 2
10case_map: [][2]u21, 3case_map: [][2]u21 = undefined,
11prop_s1: []u16 = undefined, 4prop_s1: []u16 = undefined,
12prop_s2: []u8 = undefined, 5prop_s2: []u8 = undefined,
13 6
14const Self = @This(); 7const LetterCasing = @This();
8
9pub fn init(allocator: Allocator) Allocator.Error!LetterCasing {
10 var case = LetterCasing{};
11 try case.setup(allocator);
12 return case;
13}
15 14
16pub fn init(allocator: mem.Allocator) !Self { 15pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void {
16 case.setupInner(allocator) catch |err| {
17 switch (err) {
18 error.OutOfMemory => |e| return e,
19 else => unreachable,
20 }
21 };
22}
23
24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
17 const decompressor = compress.flate.inflate.decompressor; 25 const decompressor = compress.flate.inflate.decompressor;
18 const endian = builtin.cpu.arch.endian(); 26 const endian = builtin.cpu.arch.endian();
19 27
20 var self = Self{ 28 self.case_map = try allocator.alloc([2]u21, 0x110000);
21 .case_map = try allocator.alloc([2]u21, 0x110000),
22 };
23 errdefer allocator.free(self.case_map); 29 errdefer allocator.free(self.case_map);
24 30
25 for (0..0x110000) |i| { 31 for (0..0x110000) |i| {
@@ -68,28 +74,26 @@ pub fn init(allocator: mem.Allocator) !Self {
68 self.prop_s2 = try allocator.alloc(u8, stage_2_len); 74 self.prop_s2 = try allocator.alloc(u8, stage_2_len);
69 errdefer allocator.free(self.prop_s2); 75 errdefer allocator.free(self.prop_s2);
70 _ = try cp_reader.readAll(self.prop_s2); 76 _ = try cp_reader.readAll(self.prop_s2);
71
72 return self;
73} 77}
74 78
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 79pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void {
76 allocator.free(self.case_map); 80 allocator.free(self.case_map);
77 allocator.free(self.prop_s1); 81 allocator.free(self.prop_s1);
78 allocator.free(self.prop_s2); 82 allocator.free(self.prop_s2);
79} 83}
80 84
81// Returns true if `cp` is either upper, lower, or title case. 85// Returns true if `cp` is either upper, lower, or title case.
82pub fn isCased(self: Self, cp: u21) bool { 86pub fn isCased(self: LetterCasing, cp: u21) bool {
83 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 87 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
84} 88}
85 89
86// Returns true if `cp` is uppercase. 90// Returns true if `cp` is uppercase.
87pub fn isUpper(self: Self, cp: u21) bool { 91pub fn isUpper(self: LetterCasing, cp: u21) bool {
88 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 92 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
89} 93}
90 94
91/// Returns true if `str` is all uppercase. 95/// Returns true if `str` is all uppercase.
92pub fn isUpperStr(self: Self, str: []const u8) bool { 96pub fn isUpperStr(self: LetterCasing, str: []const u8) bool {
93 var iter = CodePointIterator{ .bytes = str }; 97 var iter = CodePointIterator{ .bytes = str };
94 98
95 return while (iter.next()) |cp| { 99 return while (iter.next()) |cp| {
@@ -107,14 +111,14 @@ test "isUpperStr" {
107} 111}
108 112
109/// Returns uppercase mapping for `cp`. 113/// Returns uppercase mapping for `cp`.
110pub fn toUpper(self: Self, cp: u21) u21 { 114pub fn toUpper(self: LetterCasing, cp: u21) u21 {
111 return self.case_map[cp][0]; 115 return self.case_map[cp][0];
112} 116}
113 117
114/// Returns a new string with all letters in uppercase. 118/// Returns a new string with all letters in uppercase.
115/// Caller must free returned bytes with `allocator`. 119/// Caller must free returned bytes with `allocator`.
116pub fn toUpperStr( 120pub fn toUpperStr(
117 self: Self, 121 self: LetterCasing,
118 allocator: mem.Allocator, 122 allocator: mem.Allocator,
119 str: []const u8, 123 str: []const u8,
120) ![]u8 { 124) ![]u8 {
@@ -142,12 +146,12 @@ test "toUpperStr" {
142} 146}
143 147
144// Returns true if `cp` is lowercase. 148// Returns true if `cp` is lowercase.
145pub fn isLower(self: Self, cp: u21) bool { 149pub fn isLower(self: LetterCasing, cp: u21) bool {
146 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 150 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
147} 151}
148 152
149/// Returns true if `str` is all lowercase. 153/// Returns true if `str` is all lowercase.
150pub fn isLowerStr(self: Self, str: []const u8) bool { 154pub fn isLowerStr(self: LetterCasing, str: []const u8) bool {
151 var iter = CodePointIterator{ .bytes = str }; 155 var iter = CodePointIterator{ .bytes = str };
152 156
153 return while (iter.next()) |cp| { 157 return while (iter.next()) |cp| {
@@ -165,14 +169,14 @@ test "isLowerStr" {
165} 169}
166 170
167/// Returns lowercase mapping for `cp`. 171/// Returns lowercase mapping for `cp`.
168pub fn toLower(self: Self, cp: u21) u21 { 172pub fn toLower(self: LetterCasing, cp: u21) u21 {
169 return self.case_map[cp][1]; 173 return self.case_map[cp][1];
170} 174}
171 175
172/// Returns a new string with all letters in lowercase. 176/// Returns a new string with all letters in lowercase.
173/// Caller must free returned bytes with `allocator`. 177/// Caller must free returned bytes with `allocator`.
174pub fn toLowerStr( 178pub fn toLowerStr(
175 self: Self, 179 self: LetterCasing,
176 allocator: mem.Allocator, 180 allocator: mem.Allocator,
177 str: []const u8, 181 str: []const u8,
178) ![]u8 { 182) ![]u8 {
@@ -198,3 +202,11 @@ test "toLowerStr" {
198 defer testing.allocator.free(lowered); 202 defer testing.allocator.free(lowered);
199 try testing.expectEqualStrings("hello, world 2112!", lowered); 203 try testing.expectEqualStrings("hello, world 2112!", lowered);
200} 204}
205
206const std = @import("std");
207const builtin = @import("builtin");
208const compress = std.compress;
209const mem = std.mem;
210const Allocator = std.mem.Allocator;
211const testing = std.testing;
212const unicode = std.unicode;
diff --git a/src/Normalize.zig b/src/Normalize.zig
index d8c867d..1500b4c 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -10,22 +10,47 @@ normp_data: NormPropsData = undefined,
10 10
11const Normalize = @This(); 11const Normalize = @This();
12 12
13pub fn init(allocator: Allocator) !Normalize { 13pub fn init(allocator: Allocator) Allocator.Error!Normalize {
14 var norm: Normalize = undefined; 14 var norm: Normalize = undefined;
15 try norm.setup(allocator); 15 try norm.setup(allocator);
16 return norm; 16 return norm;
17} 17}
18 18
19pub fn setup(self: *Normalize, allocator: Allocator) !void { 19pub fn setup(self: *Normalize, allocator: Allocator) Allocator.Error!void {
20 self.canon_data = try CanonData.init(allocator); 20 self.canon_data = CanonData.init(allocator) catch |err| {
21 switch (err) {
22 error.OutOfMemory => |e| return e,
23 else => unreachable,
24 }
25 };
21 errdefer self.canon_data.deinit(allocator); 26 errdefer self.canon_data.deinit(allocator);
22 self.ccc_data = try CccData.init(allocator); 27 self.ccc_data = CccData.init(allocator) catch |err| {
28 switch (err) {
29 error.OutOfMemory => |e| return e,
30 else => unreachable,
31 }
32 };
23 errdefer self.ccc_data.deinit(allocator); 33 errdefer self.ccc_data.deinit(allocator);
24 self.compat_data = try CompatData.init(allocator); 34 self.compat_data = CompatData.init(allocator) catch |err| {
35 switch (err) {
36 error.OutOfMemory => |e| return e,
37 else => unreachable,
38 }
39 };
25 errdefer self.compat_data.deinit(allocator); 40 errdefer self.compat_data.deinit(allocator);
26 self.hangul_data = try HangulData.init(allocator); 41 self.hangul_data = HangulData.init(allocator) catch |err| {
42 switch (err) {
43 error.OutOfMemory => |e| return e,
44 else => unreachable,
45 }
46 };
27 errdefer self.hangul_data.deinit(allocator); 47 errdefer self.hangul_data.deinit(allocator);
28 self.normp_data = try NormPropsData.init(allocator); 48 self.normp_data = NormPropsData.init(allocator) catch |err| {
49 switch (err) {
50 error.OutOfMemory => |e| return e,
51 else => unreachable,
52 }
53 };
29} 54}
30 55
31pub fn deinit(norm: *const Normalize, allocator: Allocator) void { 56pub fn deinit(norm: *const Normalize, allocator: Allocator) void {
diff --git a/src/Properties.zig b/src/Properties.zig
index 46920be..f7e57ec 100644
--- a/src/Properties.zig
+++ b/src/Properties.zig
@@ -1,8 +1,4 @@
1const std = @import("std"); 1//! Properties module
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6 2
7core_s1: []u16 = undefined, 3core_s1: []u16 = undefined,
8core_s2: []u8 = undefined, 4core_s2: []u8 = undefined,
@@ -11,9 +7,24 @@ props_s2: []u8 = undefined,
11num_s1: []u16 = undefined, 7num_s1: []u16 = undefined,
12num_s2: []u8 = undefined, 8num_s2: []u8 = undefined,
13 9
14const Self = @This(); 10const Properties = @This();
11
12pub fn init(allocator: Allocator) Allocator.Error!Properties {
13 var props = Properties{};
14 try props.setup(allocator);
15 return props;
16}
17
18pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void {
19 props.setupInner(allocator) catch |err| {
20 switch (err) {
21 error.OutOfMemory => |e| return e,
22 else => unreachable,
23 }
24 };
25}
15 26
16pub fn init(allocator: mem.Allocator) !Self { 27inline fn setupInner(props: *Properties, allocator: Allocator) !void {
17 const decompressor = compress.flate.inflate.decompressor; 28 const decompressor = compress.flate.inflate.decompressor;
18 const endian = builtin.cpu.arch.endian(); 29 const endian = builtin.cpu.arch.endian();
19 30
@@ -23,17 +34,15 @@ pub fn init(allocator: mem.Allocator) !Self {
23 var core_decomp = decompressor(.raw, core_fbs.reader()); 34 var core_decomp = decompressor(.raw, core_fbs.reader());
24 var core_reader = core_decomp.reader(); 35 var core_reader = core_decomp.reader();
25 36
26 var self = Self{};
27
28 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); 37 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
29 self.core_s1 = try allocator.alloc(u16, core_stage_1_len); 38 props.core_s1 = try allocator.alloc(u16, core_stage_1_len);
30 errdefer allocator.free(self.core_s1); 39 errdefer allocator.free(props.core_s1);
31 for (0..core_stage_1_len) |i| self.core_s1[i] = try core_reader.readInt(u16, endian); 40 for (0..core_stage_1_len) |i| props.core_s1[i] = try core_reader.readInt(u16, endian);
32 41
33 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian); 42 const core_stage_2_len: u16 = try core_reader.readInt(u16, endian);
34 self.core_s2 = try allocator.alloc(u8, core_stage_2_len); 43 props.core_s2 = try allocator.alloc(u8, core_stage_2_len);
35 errdefer allocator.free(self.core_s2); 44 errdefer allocator.free(props.core_s2);
36 _ = try core_reader.readAll(self.core_s2); 45 _ = try core_reader.readAll(props.core_s2);
37 46
38 // Process PropList.txt 47 // Process PropList.txt
39 const props_bytes = @embedFile("props"); 48 const props_bytes = @embedFile("props");
@@ -42,14 +51,14 @@ pub fn init(allocator: mem.Allocator) !Self {
42 var props_reader = props_decomp.reader(); 51 var props_reader = props_decomp.reader();
43 52
44 const stage_1_len: u16 = try props_reader.readInt(u16, endian); 53 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
45 self.props_s1 = try allocator.alloc(u16, stage_1_len); 54 props.props_s1 = try allocator.alloc(u16, stage_1_len);
46 errdefer allocator.free(self.props_s1); 55 errdefer allocator.free(props.props_s1);
47 for (0..stage_1_len) |i| self.props_s1[i] = try props_reader.readInt(u16, endian); 56 for (0..stage_1_len) |i| props.props_s1[i] = try props_reader.readInt(u16, endian);
48 57
49 const stage_2_len: u16 = try props_reader.readInt(u16, endian); 58 const stage_2_len: u16 = try props_reader.readInt(u16, endian);
50 self.props_s2 = try allocator.alloc(u8, stage_2_len); 59 props.props_s2 = try allocator.alloc(u8, stage_2_len);
51 errdefer allocator.free(self.props_s2); 60 errdefer allocator.free(props.props_s2);
52 _ = try props_reader.readAll(self.props_s2); 61 _ = try props_reader.readAll(props.props_s2);
53 62
54 // Process DerivedNumericType.txt 63 // Process DerivedNumericType.txt
55 const num_bytes = @embedFile("numeric"); 64 const num_bytes = @embedFile("numeric");
@@ -58,19 +67,17 @@ pub fn init(allocator: mem.Allocator) !Self {
58 var num_reader = num_decomp.reader(); 67 var num_reader = num_decomp.reader();
59 68
60 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); 69 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
61 self.num_s1 = try allocator.alloc(u16, num_stage_1_len); 70 props.num_s1 = try allocator.alloc(u16, num_stage_1_len);
62 errdefer allocator.free(self.num_s1); 71 errdefer allocator.free(props.num_s1);
63 for (0..num_stage_1_len) |i| self.num_s1[i] = try num_reader.readInt(u16, endian); 72 for (0..num_stage_1_len) |i| props.num_s1[i] = try num_reader.readInt(u16, endian);
64 73
65 const num_stage_2_len: u16 = try num_reader.readInt(u16, endian); 74 const num_stage_2_len: u16 = try num_reader.readInt(u16, endian);
66 self.num_s2 = try allocator.alloc(u8, num_stage_2_len); 75 props.num_s2 = try allocator.alloc(u8, num_stage_2_len);
67 errdefer allocator.free(self.num_s2); 76 errdefer allocator.free(props.num_s2);
68 _ = try num_reader.readAll(self.num_s2); 77 _ = try num_reader.readAll(props.num_s2);
69
70 return self;
71} 78}
72 79
73pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 80pub fn deinit(self: *const Properties, allocator: Allocator) void {
74 allocator.free(self.core_s1); 81 allocator.free(self.core_s1);
75 allocator.free(self.core_s2); 82 allocator.free(self.core_s2);
76 allocator.free(self.props_s1); 83 allocator.free(self.props_s1);
@@ -80,62 +87,62 @@ pub fn deinit(self: *const Self, allocator: mem.Allocator) void {
80} 87}
81 88
82/// True if `cp` is a mathematical symbol. 89/// True if `cp` is a mathematical symbol.
83pub fn isMath(self: Self, cp: u21) bool { 90pub fn isMath(self: Properties, cp: u21) bool {
84 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 91 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
85} 92}
86 93
87/// True if `cp` is an alphabetic character. 94/// True if `cp` is an alphabetic character.
88pub fn isAlphabetic(self: Self, cp: u21) bool { 95pub fn isAlphabetic(self: Properties, cp: u21) bool {
89 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 96 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
90} 97}
91 98
92/// True if `cp` is a valid identifier start character. 99/// True if `cp` is a valid identifier start character.
93pub fn isIdStart(self: Self, cp: u21) bool { 100pub fn isIdStart(self: Properties, cp: u21) bool {
94 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 101 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
95} 102}
96 103
97/// True if `cp` is a valid identifier continuation character. 104/// True if `cp` is a valid identifier continuation character.
98pub fn isIdContinue(self: Self, cp: u21) bool { 105pub fn isIdContinue(self: Properties, cp: u21) bool {
99 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8; 106 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 8 == 8;
100} 107}
101 108
102/// True if `cp` is a valid extended identifier start character. 109/// True if `cp` is a valid extended identifier start character.
103pub fn isXidStart(self: Self, cp: u21) bool { 110pub fn isXidStart(self: Properties, cp: u21) bool {
104 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16; 111 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 16 == 16;
105} 112}
106 113
107/// True if `cp` is a valid extended identifier continuation character. 114/// True if `cp` is a valid extended identifier continuation character.
108pub fn isXidContinue(self: Self, cp: u21) bool { 115pub fn isXidContinue(self: Properties, cp: u21) bool {
109 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32; 116 return self.core_s2[self.core_s1[cp >> 8] + (cp & 0xff)] & 32 == 32;
110} 117}
111 118
112/// True if `cp` is a whitespace character. 119/// True if `cp` is a whitespace character.
113pub fn isWhitespace(self: Self, cp: u21) bool { 120pub fn isWhitespace(self: Properties, cp: u21) bool {
114 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 121 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
115} 122}
116 123
117/// True if `cp` is a hexadecimal digit. 124/// True if `cp` is a hexadecimal digit.
118pub fn isHexDigit(self: Self, cp: u21) bool { 125pub fn isHexDigit(self: Properties, cp: u21) bool {
119 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 126 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
120} 127}
121 128
122/// True if `cp` is a diacritic mark. 129/// True if `cp` is a diacritic mark.
123pub fn isDiacritic(self: Self, cp: u21) bool { 130pub fn isDiacritic(self: Properties, cp: u21) bool {
124 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 131 return self.props_s2[self.props_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
125} 132}
126 133
127/// True if `cp` is numeric. 134/// True if `cp` is numeric.
128pub fn isNumeric(self: Self, cp: u21) bool { 135pub fn isNumeric(self: Properties, cp: u21) bool {
129 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 136 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
130} 137}
131 138
132/// True if `cp` is a digit. 139/// True if `cp` is a digit.
133pub fn isDigit(self: Self, cp: u21) bool { 140pub fn isDigit(self: Properties, cp: u21) bool {
134 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 141 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
135} 142}
136 143
137/// True if `cp` is decimal. 144/// True if `cp` is decimal.
138pub fn isDecimal(self: Self, cp: u21) bool { 145pub fn isDecimal(self: Properties, cp: u21) bool {
139 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 146 return self.num_s2[self.num_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
140} 147}
141 148
@@ -161,3 +168,10 @@ test "Props" {
161 try testing.expect(!self.isDigit('2')); 168 try testing.expect(!self.isDigit('2'));
162 try testing.expect(!self.isDecimal('g')); 169 try testing.expect(!self.isDecimal('g'));
163} 170}
171
172const std = @import("std");
173const builtin = @import("builtin");
174const compress = std.compress;
175const mem = std.mem;
176const Allocator = mem.Allocator;
177const testing = std.testing;
diff --git a/src/Scripts.zig b/src/Scripts.zig
index 4ad8549..f71a2b5 100644
--- a/src/Scripts.zig
+++ b/src/Scripts.zig
@@ -1,10 +1,10 @@
1const std = @import("std"); 1//! Scripts Module
2const builtin = @import("builtin"); 2
3const compress = std.compress; 3s1: []u16 = undefined,
4const mem = std.mem; 4s2: []u8 = undefined,
5const testing = std.testing; 5s3: []u8 = undefined,
6 6
7/// Scripts 7/// Scripts enum
8pub const Script = enum { 8pub const Script = enum {
9 none, 9 none,
10 Adlam, 10 Adlam,
@@ -172,13 +172,24 @@ pub const Script = enum {
172 Zanabazar_Square, 172 Zanabazar_Square,
173}; 173};
174 174
175s1: []u16 = undefined, 175const Scripts = @This();
176s2: []u8 = undefined,
177s3: []u8 = undefined,
178 176
179const Self = @This(); 177pub fn init(allocator: Allocator) Allocator.Error!Scripts {
178 var scripts = Scripts{};
179 try scripts.setup(allocator);
180 return scripts;
181}
182
183pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void {
184 scripts.setupInner(allocator) catch |err| {
185 switch (err) {
186 error.OutOfMemory => |e| return e,
187 else => unreachable,
188 }
189 };
190}
180 191
181pub fn init(allocator: mem.Allocator) !Self { 192inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void {
182 const decompressor = compress.flate.inflate.decompressor; 193 const decompressor = compress.flate.inflate.decompressor;
183 const in_bytes = @embedFile("scripts"); 194 const in_bytes = @embedFile("scripts");
184 var in_fbs = std.io.fixedBufferStream(in_bytes); 195 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -187,34 +198,30 @@ pub fn init(allocator: mem.Allocator) !Self {
187 198
188 const endian = builtin.cpu.arch.endian(); 199 const endian = builtin.cpu.arch.endian();
189 200
190 var self = Self{};
191
192 const s1_len: u16 = try reader.readInt(u16, endian); 201 const s1_len: u16 = try reader.readInt(u16, endian);
193 self.s1 = try allocator.alloc(u16, s1_len); 202 scripts.s1 = try allocator.alloc(u16, s1_len);
194 errdefer allocator.free(self.s1); 203 errdefer allocator.free(scripts.s1);
195 for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); 204 for (0..s1_len) |i| scripts.s1[i] = try reader.readInt(u16, endian);
196 205
197 const s2_len: u16 = try reader.readInt(u16, endian); 206 const s2_len: u16 = try reader.readInt(u16, endian);
198 self.s2 = try allocator.alloc(u8, s2_len); 207 scripts.s2 = try allocator.alloc(u8, s2_len);
199 errdefer allocator.free(self.s2); 208 errdefer allocator.free(scripts.s2);
200 _ = try reader.readAll(self.s2); 209 _ = try reader.readAll(scripts.s2);
201 210
202 const s3_len: u16 = try reader.readInt(u8, endian); 211 const s3_len: u16 = try reader.readInt(u8, endian);
203 self.s3 = try allocator.alloc(u8, s3_len); 212 scripts.s3 = try allocator.alloc(u8, s3_len);
204 errdefer allocator.free(self.s3); 213 errdefer allocator.free(scripts.s3);
205 _ = try reader.readAll(self.s3); 214 _ = try reader.readAll(scripts.s3);
206
207 return self;
208} 215}
209 216
210pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 217pub fn deinit(self: *const Scripts, allocator: mem.Allocator) void {
211 allocator.free(self.s1); 218 allocator.free(self.s1);
212 allocator.free(self.s2); 219 allocator.free(self.s2);
213 allocator.free(self.s3); 220 allocator.free(self.s3);
214} 221}
215 222
216/// Lookup the Script type for `cp`. 223/// Lookup the Script type for `cp`.
217pub fn script(self: Self, cp: u21) ?Script { 224pub fn script(self: Scripts, cp: u21) ?Script {
218 const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]; 225 const byte = self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]];
219 if (byte == 0) return null; 226 if (byte == 0) return null;
220 return @enumFromInt(byte); 227 return @enumFromInt(byte);
@@ -225,3 +232,10 @@ test "script" {
225 defer self.deinit(std.testing.allocator); 232 defer self.deinit(std.testing.allocator);
226 try testing.expectEqual(Script.Latin, self.script('A').?); 233 try testing.expectEqual(Script.Latin, self.script('A').?);
227} 234}
235
236const std = @import("std");
237const builtin = @import("builtin");
238const compress = std.compress;
239const mem = std.mem;
240const Allocator = mem.Allocator;
241const testing = std.testing;