summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md4
-rw-r--r--build.zig209
-rw-r--r--build.zig.zon4
-rw-r--r--codegen/canon.zig27
-rw-r--r--codegen/case_prop.zig37
-rw-r--r--codegen/ccc.zig37
-rw-r--r--codegen/compat.zig29
-rw-r--r--codegen/core_props.zig38
-rw-r--r--codegen/dwp.zig50
-rw-r--r--codegen/fold.zig64
-rw-r--r--codegen/gbp.zig71
-rw-r--r--codegen/gencat.zig41
-rw-r--r--codegen/hangul.zig38
-rw-r--r--codegen/lower.zig29
-rw-r--r--codegen/normp.zig38
-rw-r--r--codegen/numeric.zig37
-rw-r--r--codegen/props.zig38
-rw-r--r--codegen/scripts.zig46
-rw-r--r--codegen/upper.zig31
-rw-r--r--codegen/wbp.zig39
-rw-r--r--src/CanonData.zig4
-rw-r--r--src/CaseFolding.zig6
-rw-r--r--src/CombiningData.zig5
-rw-r--r--src/CompatData.zig5
-rw-r--r--src/DisplayWidth.zig31
-rw-r--r--src/GeneralCategories.zig4
-rw-r--r--src/Graphemes.zig4
-rw-r--r--src/HangulData.zig5
-rw-r--r--src/LetterCasing.zig14
-rw-r--r--src/NormPropsData.zig5
-rw-r--r--src/Normalize.zig10
-rw-r--r--src/Properties.zig10
-rw-r--r--src/Scripts.zig5
-rw-r--r--src/Words.zig4
-rw-r--r--src/unicode_tests.zig94
35 files changed, 557 insertions, 556 deletions
diff --git a/README.md b/README.md
index 3abe480..6ba456f 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ The Unicode version supported by zg is `16.0.0`.
10 10
11## Zig Version 11## Zig Version
12 12
13The minimum Zig version required is `0.14`. 13The minimum Zig version required is `0.15.2`.
14 14
15 15
16## Integrating zg into your Zig Project 16## Integrating zg into your Zig Project
@@ -19,7 +19,7 @@ You first need to add zg as a dependency in your `build.zig.zon` file. In your
19Zig project's root directory, run: 19Zig project's root directory, run:
20 20
21```plain 21```plain
22zig fetch --save https://codeberg.org/atman/zg/archive/v0.14.1.tar.gz 22zig fetch --save https://codeberg.org/atman/zg/archive/v0.15.3.tar.gz
23``` 23```
24 24
25Then instantiate the dependency in your `build.zig`: 25Then instantiate the dependency in your `build.zig`:
diff --git a/build.zig b/build.zig
index ca0eeef..5678cd1 100644
--- a/build.zig
+++ b/build.zig
@@ -42,28 +42,40 @@ pub fn build(b: *std.Build) void {
42 // Grapheme break 42 // Grapheme break
43 const gbp_gen_exe = b.addExecutable(.{ 43 const gbp_gen_exe = b.addExecutable(.{
44 .name = "gbp", 44 .name = "gbp",
45 .root_source_file = b.path("codegen/gbp.zig"), 45 .root_module = b.createModule(.{
46 .target = b.graph.host, 46 .root_source_file = b.path("codegen/gbp.zig"),
47 .optimize = .Debug, 47 .target = b.graph.host,
48 }); 48 .optimize = .Debug,
49 }),
50 });
51 gbp_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") });
52 gbp_gen_exe.root_module.addAnonymousImport("GraphemeBreakProperty.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/GraphemeBreakProperty.txt") });
53 gbp_gen_exe.root_module.addAnonymousImport("emoji-data.txt", .{ .root_source_file = b.path("data/unicode/emoji/emoji-data.txt") });
49 const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe); 54 const run_gbp_gen_exe = b.addRunArtifact(gbp_gen_exe);
50 const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z"); 55 const gbp_gen_out = run_gbp_gen_exe.addOutputFileArg("gbp.bin.z");
51 56
52 const wbp_gen_exe = b.addExecutable(.{ 57 const wbp_gen_exe = b.addExecutable(.{
53 .name = "wbp", 58 .name = "wbp",
54 .root_source_file = b.path("codegen/wbp.zig"), 59 .root_module = b.createModule(.{
55 .target = b.graph.host, 60 .root_source_file = b.path("codegen/wbp.zig"),
56 .optimize = .Debug, 61 .target = b.graph.host,
62 .optimize = .Debug,
63 }),
57 }); 64 });
65 wbp_gen_exe.root_module.addAnonymousImport("WordBreakProperty.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/WordBreakProperty.txt") });
58 const run_wbp_gen_exe = b.addRunArtifact(wbp_gen_exe); 66 const run_wbp_gen_exe = b.addRunArtifact(wbp_gen_exe);
59 const wbp_gen_out = run_wbp_gen_exe.addOutputFileArg("wbp.bin.z"); 67 const wbp_gen_out = run_wbp_gen_exe.addOutputFileArg("wbp.bin.z");
60 68
61 const dwp_gen_exe = b.addExecutable(.{ 69 const dwp_gen_exe = b.addExecutable(.{
62 .name = "dwp", 70 .name = "dwp",
63 .root_source_file = b.path("codegen/dwp.zig"), 71 .root_module = b.createModule(.{
64 .target = b.graph.host, 72 .root_source_file = b.path("codegen/dwp.zig"),
65 .optimize = .Debug, 73 .target = b.graph.host,
66 }); 74 .optimize = .Debug,
75 }),
76 });
77 dwp_gen_exe.root_module.addAnonymousImport("DerivedEastAsianWidth.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedEastAsianWidth.txt") });
78 dwp_gen_exe.root_module.addAnonymousImport("DerivedGeneralCategory.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedGeneralCategory.txt") });
67 dwp_gen_exe.root_module.addOptions("options", dwp_options); 79 dwp_gen_exe.root_module.addOptions("options", dwp_options);
68 const run_dwp_gen_exe = b.addRunArtifact(dwp_gen_exe); 80 const run_dwp_gen_exe = b.addRunArtifact(dwp_gen_exe);
69 const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z"); 81 const dwp_gen_out = run_dwp_gen_exe.addOutputFileArg("dwp.bin.z");
@@ -71,131 +83,175 @@ pub fn build(b: *std.Build) void {
71 // Normalization properties 83 // Normalization properties
72 const canon_gen_exe = b.addExecutable(.{ 84 const canon_gen_exe = b.addExecutable(.{
73 .name = "canon", 85 .name = "canon",
74 .root_source_file = b.path("codegen/canon.zig"), 86 .root_module = b.createModule(.{
75 .target = b.graph.host, 87 .root_source_file = b.path("codegen/canon.zig"),
76 .optimize = .Debug, 88 .target = b.graph.host,
89 .optimize = .Debug,
90 }),
77 }); 91 });
92 canon_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") });
78 const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe); 93 const run_canon_gen_exe = b.addRunArtifact(canon_gen_exe);
79 const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z"); 94 const canon_gen_out = run_canon_gen_exe.addOutputFileArg("canon.bin.z");
80 95
81 const compat_gen_exe = b.addExecutable(.{ 96 const compat_gen_exe = b.addExecutable(.{
82 .name = "compat", 97 .name = "compat",
83 .root_source_file = b.path("codegen/compat.zig"), 98 .root_module = b.createModule(.{
84 .target = b.graph.host, 99 .root_source_file = b.path("codegen/compat.zig"),
85 .optimize = .Debug, 100 .target = b.graph.host,
101 .optimize = .Debug,
102 }),
86 }); 103 });
104 compat_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") });
87 const run_compat_gen_exe = b.addRunArtifact(compat_gen_exe); 105 const run_compat_gen_exe = b.addRunArtifact(compat_gen_exe);
88 const compat_gen_out = run_compat_gen_exe.addOutputFileArg("compat.bin.z"); 106 const compat_gen_out = run_compat_gen_exe.addOutputFileArg("compat.bin.z");
89 107
90 const hangul_gen_exe = b.addExecutable(.{ 108 const hangul_gen_exe = b.addExecutable(.{
91 .name = "hangul", 109 .name = "hangul",
92 .root_source_file = b.path("codegen/hangul.zig"), 110 .root_module = b.createModule(.{
93 .target = b.graph.host, 111 .root_source_file = b.path("codegen/hangul.zig"),
94 .optimize = .Debug, 112 .target = b.graph.host,
113 .optimize = .Debug,
114 }),
95 }); 115 });
116 hangul_gen_exe.root_module.addAnonymousImport("HangulSyllableType.txt", .{ .root_source_file = b.path("data/unicode/HangulSyllableType.txt") });
96 const run_hangul_gen_exe = b.addRunArtifact(hangul_gen_exe); 117 const run_hangul_gen_exe = b.addRunArtifact(hangul_gen_exe);
97 const hangul_gen_out = run_hangul_gen_exe.addOutputFileArg("hangul.bin.z"); 118 const hangul_gen_out = run_hangul_gen_exe.addOutputFileArg("hangul.bin.z");
98 119
99 const normp_gen_exe = b.addExecutable(.{ 120 const normp_gen_exe = b.addExecutable(.{
100 .name = "normp", 121 .name = "normp",
101 .root_source_file = b.path("codegen/normp.zig"), 122 .root_module = b.createModule(.{
102 .target = b.graph.host, 123 .root_source_file = b.path("codegen/normp.zig"),
103 .optimize = .Debug, 124 .target = b.graph.host,
125 .optimize = .Debug,
126 }),
104 }); 127 });
128 normp_gen_exe.root_module.addAnonymousImport("DerivedNormalizationProps.txt", .{ .root_source_file = b.path("data/unicode/DerivedNormalizationProps.txt") });
105 const run_normp_gen_exe = b.addRunArtifact(normp_gen_exe); 129 const run_normp_gen_exe = b.addRunArtifact(normp_gen_exe);
106 const normp_gen_out = run_normp_gen_exe.addOutputFileArg("normp.bin.z"); 130 const normp_gen_out = run_normp_gen_exe.addOutputFileArg("normp.bin.z");
107 131
108 const ccc_gen_exe = b.addExecutable(.{ 132 const ccc_gen_exe = b.addExecutable(.{
109 .name = "ccc", 133 .name = "ccc",
110 .root_source_file = b.path("codegen/ccc.zig"), 134 .root_module = b.createModule(.{
111 .target = b.graph.host, 135 .root_source_file = b.path("codegen/ccc.zig"),
112 .optimize = .Debug, 136 .target = b.graph.host,
137 .optimize = .Debug,
138 }),
113 }); 139 });
140 ccc_gen_exe.root_module.addAnonymousImport("DerivedCombiningClass.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedCombiningClass.txt") });
114 const run_ccc_gen_exe = b.addRunArtifact(ccc_gen_exe); 141 const run_ccc_gen_exe = b.addRunArtifact(ccc_gen_exe);
115 const ccc_gen_out = run_ccc_gen_exe.addOutputFileArg("ccc.bin.z"); 142 const ccc_gen_out = run_ccc_gen_exe.addOutputFileArg("ccc.bin.z");
116 143
117 const gencat_gen_exe = b.addExecutable(.{ 144 const gencat_gen_exe = b.addExecutable(.{
118 .name = "gencat", 145 .name = "gencat",
119 .root_source_file = b.path("codegen/gencat.zig"), 146 .root_module = b.createModule(.{
120 .target = b.graph.host, 147 .root_source_file = b.path("codegen/gencat.zig"),
121 .optimize = .Debug, 148 .target = b.graph.host,
149 .optimize = .Debug,
150 }),
122 }); 151 });
152 gencat_gen_exe.root_module.addAnonymousImport("DerivedGeneralCategory.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedGeneralCategory.txt") });
123 const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe); 153 const run_gencat_gen_exe = b.addRunArtifact(gencat_gen_exe);
124 const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.bin.z"); 154 const gencat_gen_out = run_gencat_gen_exe.addOutputFileArg("gencat.bin.z");
125 155
126 const fold_gen_exe = b.addExecutable(.{ 156 const fold_gen_exe = b.addExecutable(.{
127 .name = "fold", 157 .name = "fold",
128 .root_source_file = b.path("codegen/fold.zig"), 158 .root_module = b.createModule(.{
129 .target = b.graph.host, 159 .root_source_file = b.path("codegen/fold.zig"),
130 .optimize = .Debug, 160 .target = b.graph.host,
131 }); 161 .optimize = .Debug,
162 }),
163 });
164 fold_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") });
165 fold_gen_exe.root_module.addAnonymousImport("CaseFolding.txt", .{ .root_source_file = b.path("data/unicode/CaseFolding.txt") });
132 const run_fold_gen_exe = b.addRunArtifact(fold_gen_exe); 166 const run_fold_gen_exe = b.addRunArtifact(fold_gen_exe);
133 const fold_gen_out = run_fold_gen_exe.addOutputFileArg("fold.bin.z"); 167 const fold_gen_out = run_fold_gen_exe.addOutputFileArg("fold.bin.z");
134 168
135 // Numeric types 169 // Numeric types
136 const num_gen_exe = b.addExecutable(.{ 170 const num_gen_exe = b.addExecutable(.{
137 .name = "numeric", 171 .name = "numeric",
138 .root_source_file = b.path("codegen/numeric.zig"), 172 .root_module = b.createModule(.{
139 .target = b.graph.host, 173 .root_source_file = b.path("codegen/numeric.zig"),
140 .optimize = .Debug, 174 .target = b.graph.host,
175 .optimize = .Debug,
176 }),
141 }); 177 });
178 num_gen_exe.root_module.addAnonymousImport("DerivedNumericType.txt", .{ .root_source_file = b.path("data/unicode/extracted/DerivedNumericType.txt") });
142 const run_num_gen_exe = b.addRunArtifact(num_gen_exe); 179 const run_num_gen_exe = b.addRunArtifact(num_gen_exe);
143 const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.bin.z"); 180 const num_gen_out = run_num_gen_exe.addOutputFileArg("numeric.bin.z");
144 181
145 // Letter case properties 182 // Letter case properties
146 const case_prop_gen_exe = b.addExecutable(.{ 183 const case_prop_gen_exe = b.addExecutable(.{
147 .name = "case_prop", 184 .name = "case_prop",
148 .root_source_file = b.path("codegen/case_prop.zig"), 185 .root_module = b.createModule(.{
149 .target = b.graph.host, 186 .root_source_file = b.path("codegen/case_prop.zig"),
150 .optimize = .Debug, 187 .target = b.graph.host,
188 .optimize = .Debug,
189 }),
151 }); 190 });
191 case_prop_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") });
152 const run_case_prop_gen_exe = b.addRunArtifact(case_prop_gen_exe); 192 const run_case_prop_gen_exe = b.addRunArtifact(case_prop_gen_exe);
153 const case_prop_gen_out = run_case_prop_gen_exe.addOutputFileArg("case_prop.bin.z"); 193 const case_prop_gen_out = run_case_prop_gen_exe.addOutputFileArg("case_prop.bin.z");
154 194
155 // Uppercase mappings 195 // Uppercase mappings
156 const upper_gen_exe = b.addExecutable(.{ 196 const upper_gen_exe = b.addExecutable(.{
157 .name = "upper", 197 .name = "upper",
158 .root_source_file = b.path("codegen/upper.zig"), 198 .root_module = b.createModule(.{
159 .target = b.graph.host, 199 .root_source_file = b.path("codegen/upper.zig"),
160 .optimize = .Debug, 200 .target = b.graph.host,
201 .optimize = .Debug,
202 }),
161 }); 203 });
204 upper_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") });
162 const run_upper_gen_exe = b.addRunArtifact(upper_gen_exe); 205 const run_upper_gen_exe = b.addRunArtifact(upper_gen_exe);
163 const upper_gen_out = run_upper_gen_exe.addOutputFileArg("upper.bin.z"); 206 const upper_gen_out = run_upper_gen_exe.addOutputFileArg("upper.bin.z");
164 207
165 // Lowercase mappings 208 // Lowercase mappings
166 const lower_gen_exe = b.addExecutable(.{ 209 const lower_gen_exe = b.addExecutable(.{
167 .name = "lower", 210 .name = "lower",
168 .root_source_file = b.path("codegen/lower.zig"), 211 .root_module = b.createModule(.{
169 .target = b.graph.host, 212 .root_source_file = b.path("codegen/lower.zig"),
170 .optimize = .Debug, 213 .target = b.graph.host,
214 .optimize = .Debug,
215 }),
171 }); 216 });
217 lower_gen_exe.root_module.addAnonymousImport("UnicodeData.txt", .{ .root_source_file = b.path("data/unicode/UnicodeData.txt") });
172 const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe); 218 const run_lower_gen_exe = b.addRunArtifact(lower_gen_exe);
173 const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z"); 219 const lower_gen_out = run_lower_gen_exe.addOutputFileArg("lower.bin.z");
174 220
175 const scripts_gen_exe = b.addExecutable(.{ 221 const scripts_gen_exe = b.addExecutable(.{
176 .name = "scripts", 222 .name = "scripts",
177 .root_source_file = b.path("codegen/scripts.zig"), 223 .root_module = b.createModule(.{
178 .target = b.graph.host, 224 .root_source_file = b.path("codegen/scripts.zig"),
179 .optimize = .Debug, 225 .target = b.graph.host,
226 .optimize = .Debug,
227 }),
180 }); 228 });
229 scripts_gen_exe.root_module.addAnonymousImport("Scripts.txt", .{ .root_source_file = b.path("data/unicode/Scripts.txt") });
181 const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe); 230 const run_scripts_gen_exe = b.addRunArtifact(scripts_gen_exe);
182 const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z"); 231 const scripts_gen_out = run_scripts_gen_exe.addOutputFileArg("scripts.bin.z");
183 232
184 const core_gen_exe = b.addExecutable(.{ 233 const core_gen_exe = b.addExecutable(.{
185 .name = "core", 234 .name = "core",
186 .root_source_file = b.path("codegen/core_props.zig"), 235 .root_module = b.createModule(.{
187 .target = b.graph.host, 236 .root_source_file = b.path("codegen/core_props.zig"),
188 .optimize = .Debug, 237 .target = b.graph.host,
238 .optimize = .Debug,
239 }),
189 }); 240 });
241 core_gen_exe.root_module.addAnonymousImport("DerivedCoreProperties.txt", .{ .root_source_file = b.path("data/unicode/DerivedCoreProperties.txt") });
190 const run_core_gen_exe = b.addRunArtifact(core_gen_exe); 242 const run_core_gen_exe = b.addRunArtifact(core_gen_exe);
191 const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z"); 243 const core_gen_out = run_core_gen_exe.addOutputFileArg("core_props.bin.z");
192 244
193 const props_gen_exe = b.addExecutable(.{ 245 const props_gen_exe = b.addExecutable(.{
194 .name = "props", 246 .name = "props",
195 .root_source_file = b.path("codegen/props.zig"), 247 .root_module = b.createModule(.{
196 .target = b.graph.host, 248 .root_source_file = b.path("codegen/props.zig"),
197 .optimize = .Debug, 249 .target = b.graph.host,
250 .optimize = .Debug,
251 }),
198 }); 252 });
253
254 props_gen_exe.root_module.addAnonymousImport("PropList.txt", .{ .root_source_file = b.path("data/unicode/PropList.txt") });
199 const run_props_gen_exe = b.addRunArtifact(props_gen_exe); 255 const run_props_gen_exe = b.addRunArtifact(props_gen_exe);
200 const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z"); 256 const props_gen_out = run_props_gen_exe.addOutputFileArg("props.bin.z");
201 257
@@ -212,8 +268,6 @@ pub fn build(b: *std.Build) void {
212 const code_point_t = b.addTest(.{ 268 const code_point_t = b.addTest(.{
213 .name = "code_point", 269 .name = "code_point",
214 .root_module = code_point, 270 .root_module = code_point,
215 .target = target,
216 .optimize = optimize,
217 }); 271 });
218 const code_point_tr = b.addRunArtifact(code_point_t); 272 const code_point_tr = b.addRunArtifact(code_point_t);
219 273
@@ -230,8 +284,6 @@ pub fn build(b: *std.Build) void {
230 const grapheme_t = b.addTest(.{ 284 const grapheme_t = b.addTest(.{
231 .name = "Graphemes", 285 .name = "Graphemes",
232 .root_module = graphemes, 286 .root_module = graphemes,
233 .target = target,
234 .optimize = optimize,
235 }); 287 });
236 const grapheme_tr = b.addRunArtifact(grapheme_t); 288 const grapheme_tr = b.addRunArtifact(grapheme_t);
237 289
@@ -247,8 +299,6 @@ pub fn build(b: *std.Build) void {
247 const words_t = b.addTest(.{ 299 const words_t = b.addTest(.{
248 .name = "WordBreak", 300 .name = "WordBreak",
249 .root_module = words, 301 .root_module = words,
250 .target = target,
251 .optimize = optimize,
252 }); 302 });
253 const words_tr = b.addRunArtifact(words_t); 303 const words_tr = b.addRunArtifact(words_t);
254 304
@@ -262,8 +312,6 @@ pub fn build(b: *std.Build) void {
262 const ascii_t = b.addTest(.{ 312 const ascii_t = b.addTest(.{
263 .name = "ascii", 313 .name = "ascii",
264 .root_module = ascii, 314 .root_module = ascii,
265 .target = target,
266 .optimize = optimize,
267 }); 315 });
268 const ascii_tr = b.addRunArtifact(ascii_t); 316 const ascii_tr = b.addRunArtifact(ascii_t);
269 317
@@ -282,8 +330,6 @@ pub fn build(b: *std.Build) void {
282 const display_width_t = b.addTest(.{ 330 const display_width_t = b.addTest(.{
283 .name = "display_width", 331 .name = "display_width",
284 .root_module = display_width, 332 .root_module = display_width,
285 .target = target,
286 .optimize = optimize,
287 }); 333 });
288 const display_width_tr = b.addRunArtifact(display_width_t); 334 const display_width_tr = b.addRunArtifact(display_width_t);
289 335
@@ -298,8 +344,6 @@ pub fn build(b: *std.Build) void {
298 const ccc_data_t = b.addTest(.{ 344 const ccc_data_t = b.addTest(.{
299 .name = "ccc_data", 345 .name = "ccc_data",
300 .root_module = ccc_data, 346 .root_module = ccc_data,
301 .target = target,
302 .optimize = optimize,
303 }); 347 });
304 const ccc_data_tr = b.addRunArtifact(ccc_data_t); 348 const ccc_data_tr = b.addRunArtifact(ccc_data_t);
305 349
@@ -314,8 +358,6 @@ pub fn build(b: *std.Build) void {
314 const canon_data_t = b.addTest(.{ 358 const canon_data_t = b.addTest(.{
315 .name = "canon_data", 359 .name = "canon_data",
316 .root_module = canon_data, 360 .root_module = canon_data,
317 .target = target,
318 .optimize = optimize,
319 }); 361 });
320 const canon_data_tr = b.addRunArtifact(canon_data_t); 362 const canon_data_tr = b.addRunArtifact(canon_data_t);
321 363
@@ -330,8 +372,6 @@ pub fn build(b: *std.Build) void {
330 const compat_data_t = b.addTest(.{ 372 const compat_data_t = b.addTest(.{
331 .name = "compat_data", 373 .name = "compat_data",
332 .root_module = compat_data, 374 .root_module = compat_data,
333 .target = target,
334 .optimize = optimize,
335 }); 375 });
336 const compat_data_tr = b.addRunArtifact(compat_data_t); 376 const compat_data_tr = b.addRunArtifact(compat_data_t);
337 377
@@ -345,8 +385,6 @@ pub fn build(b: *std.Build) void {
345 const hangul_data_t = b.addTest(.{ 385 const hangul_data_t = b.addTest(.{
346 .name = "hangul_data", 386 .name = "hangul_data",
347 .root_module = hangul_data, 387 .root_module = hangul_data,
348 .target = target,
349 .optimize = optimize,
350 }); 388 });
351 const hangul_data_tr = b.addRunArtifact(hangul_data_t); 389 const hangul_data_tr = b.addRunArtifact(hangul_data_t);
352 390
@@ -360,8 +398,6 @@ pub fn build(b: *std.Build) void {
360 const normp_data_t = b.addTest(.{ 398 const normp_data_t = b.addTest(.{
361 .name = "normp_data", 399 .name = "normp_data",
362 .root_module = normp_data, 400 .root_module = normp_data,
363 .target = target,
364 .optimize = optimize,
365 }); 401 });
366 const normp_data_tr = b.addRunArtifact(normp_data_t); 402 const normp_data_tr = b.addRunArtifact(normp_data_t);
367 403
@@ -381,8 +417,6 @@ pub fn build(b: *std.Build) void {
381 const norm_t = b.addTest(.{ 417 const norm_t = b.addTest(.{
382 .name = "norm", 418 .name = "norm",
383 .root_module = norm, 419 .root_module = norm,
384 .target = target,
385 .optimize = optimize,
386 }); 420 });
387 const norm_tr = b.addRunArtifact(norm_t); 421 const norm_tr = b.addRunArtifact(norm_t);
388 422
@@ -397,8 +431,6 @@ pub fn build(b: *std.Build) void {
397 const gencat_t = b.addTest(.{ 431 const gencat_t = b.addTest(.{
398 .name = "gencat", 432 .name = "gencat",
399 .root_module = gencat, 433 .root_module = gencat,
400 .target = target,
401 .optimize = optimize,
402 }); 434 });
403 const gencat_tr = b.addRunArtifact(gencat_t); 435 const gencat_tr = b.addRunArtifact(gencat_t);
404 436
@@ -415,8 +447,6 @@ pub fn build(b: *std.Build) void {
415 const case_fold_t = b.addTest(.{ 447 const case_fold_t = b.addTest(.{
416 .name = "case_fold", 448 .name = "case_fold",
417 .root_module = case_fold, 449 .root_module = case_fold,
418 .target = target,
419 .optimize = optimize,
420 }); 450 });
421 const case_fold_tr = b.addRunArtifact(case_fold_t); 451 const case_fold_tr = b.addRunArtifact(case_fold_t);
422 452
@@ -434,8 +464,6 @@ pub fn build(b: *std.Build) void {
434 const letter_case_t = b.addTest(.{ 464 const letter_case_t = b.addTest(.{
435 .name = "lettercase", 465 .name = "lettercase",
436 .root_module = letter_case, 466 .root_module = letter_case,
437 .target = target,
438 .optimize = optimize,
439 }); 467 });
440 const letter_case_tr = b.addRunArtifact(letter_case_t); 468 const letter_case_tr = b.addRunArtifact(letter_case_t);
441 469
@@ -450,8 +478,6 @@ pub fn build(b: *std.Build) void {
450 const scripts_t = b.addTest(.{ 478 const scripts_t = b.addTest(.{
451 .name = "scripts", 479 .name = "scripts",
452 .root_module = scripts, 480 .root_module = scripts,
453 .target = target,
454 .optimize = optimize,
455 }); 481 });
456 const scripts_tr = b.addRunArtifact(scripts_t); 482 const scripts_tr = b.addRunArtifact(scripts_t);
457 483
@@ -468,17 +494,20 @@ pub fn build(b: *std.Build) void {
468 const properties_t = b.addTest(.{ 494 const properties_t = b.addTest(.{
469 .name = "properties", 495 .name = "properties",
470 .root_module = properties, 496 .root_module = properties,
471 .target = target,
472 .optimize = optimize,
473 }); 497 });
474 const properties_tr = b.addRunArtifact(properties_t); 498 const properties_tr = b.addRunArtifact(properties_t);
475 499
476 // Unicode Tests 500 // Unicode Tests
477 const unicode_tests = b.addTest(.{ 501 const unicode_tests = b.addTest(.{
478 .root_source_file = b.path("src/unicode_tests.zig"), 502 .root_module = b.createModule(.{
479 .target = target, 503 .root_source_file = b.path("src/unicode_tests.zig"),
480 .optimize = optimize, 504 .target = target,
481 }); 505 .optimize = optimize,
506 }),
507 });
508 unicode_tests.root_module.addAnonymousImport("GraphemeBreakTest.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/GraphemeBreakTest.txt") });
509 unicode_tests.root_module.addAnonymousImport("NormalizationTest.txt", .{ .root_source_file = b.path("data/unicode/NormalizationTest.txt") });
510 unicode_tests.root_module.addAnonymousImport("WordBreakTest.txt", .{ .root_source_file = b.path("data/unicode/auxiliary/WordBreakTest.txt") });
482 unicode_tests.root_module.addImport("Graphemes", graphemes); 511 unicode_tests.root_module.addImport("Graphemes", graphemes);
483 unicode_tests.root_module.addImport("Normalize", norm); 512 unicode_tests.root_module.addImport("Normalize", norm);
484 unicode_tests.root_module.addImport("Words", words); 513 unicode_tests.root_module.addImport("Words", words);
diff --git a/build.zig.zon b/build.zig.zon
index 3e1df95..0308457 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,7 +1,7 @@
1.{ 1.{
2 .name = .zg, 2 .name = .zg,
3 .version = "0.14.1", 3 .version = "0.15.3",
4 .minimum_zig_version = "0.14.0", 4 .minimum_zig_version = "0.15.2",
5 .fingerprint = 0x47df7778dc946aa0, 5 .fingerprint = 0x47df7778dc946aa0,
6 6
7 .paths = .{ 7 .paths = .{
diff --git a/codegen/canon.zig b/codegen/canon.zig
index 28b7f28..d95a905 100644
--- a/codegen/canon.zig
+++ b/codegen/canon.zig
@@ -1,32 +1,27 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3 3
4pub fn main() !void { 4pub fn main() anyerror!void {
5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
6 defer arena.deinit(); 6 defer arena.deinit();
7 const allocator = arena.allocator(); 7 const allocator = arena.allocator();
8 8
9 var write_buf: [4096]u8 = undefined;
9 // Process UnicodeData.txt 10 // Process UnicodeData.txt
10 var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); 11 var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt"));
11 defer in_file.close();
12 var in_buf = std.io.bufferedReader(in_file.reader());
13 const in_reader = in_buf.reader();
14
15 var args_iter = try std.process.argsWithAllocator(allocator); 12 var args_iter = try std.process.argsWithAllocator(allocator);
16 defer args_iter.deinit(); 13 defer args_iter.deinit();
17 _ = args_iter.skip(); 14 _ = args_iter.skip();
18 const output_path = args_iter.next() orelse @panic("No output file arg!"); 15 const output_path = args_iter.next() orelse @panic("No output file arg!");
19 16
20 const compressor = std.compress.flate.deflate.compressor;
21 var out_file = try std.fs.cwd().createFile(output_path, .{}); 17 var out_file = try std.fs.cwd().createFile(output_path, .{});
22 defer out_file.close(); 18 defer out_file.close();
23 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 19 var file_writer = out_file.writer(&write_buf);
24 const writer = out_comp.writer(); 20 var writer = &file_writer.interface;
25
26 const endian = builtin.cpu.arch.endian(); 21 const endian = builtin.cpu.arch.endian();
27 var line_buf: [4096]u8 = undefined;
28 22
29 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 23 lines: while (in_reader.takeDelimiterInclusive('\n')) |took| {
24 const line = std.mem.trimRight(u8, took, "\n");
30 if (line.len == 0) continue; 25 if (line.len == 0) continue;
31 26
32 var field_iter = std.mem.splitScalar(u8, line, ';'); 27 var field_iter = std.mem.splitScalar(u8, line, ';');
@@ -60,8 +55,12 @@ pub fn main() !void {
60 55
61 try writer.writeInt(u8, @intCast(len), endian); 56 try writer.writeInt(u8, @intCast(len), endian);
62 for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian); 57 for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian);
58 } else |err| switch (err) {
59 error.EndOfStream => {},
60 else => {
61 return err;
62 },
63 } 63 }
64
65 try writer.writeInt(u16, 0, endian); 64 try writer.writeInt(u16, 0, endian);
66 try out_comp.flush(); 65 try writer.flush();
67} 66}
diff --git a/codegen/case_prop.zig b/codegen/case_prop.zig
index 6c912a8..613f7f6 100644
--- a/codegen/case_prop.zig
+++ b/codegen/case_prop.zig
@@ -22,7 +22,7 @@ const BlockMap = std.HashMap(
22 std.hash_map.default_max_load_percentage, 22 std.hash_map.default_max_load_percentage,
23); 23);
24 24
25pub fn main() !void { 25pub fn main() anyerror!void {
26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
27 defer arena.deinit(); 27 defer arena.deinit();
28 const allocator = arena.allocator(); 28 const allocator = arena.allocator();
@@ -30,15 +30,10 @@ pub fn main() !void {
30 var flat_map = std.AutoHashMap(u21, u8).init(allocator); 30 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
31 defer flat_map.deinit(); 31 defer flat_map.deinit();
32 32
33 var line_buf: [4096]u8 = undefined;
34
35 // Process DerivedCoreProperties.txt 33 // Process DerivedCoreProperties.txt
36 var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); 34 var in_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt"));
37 defer in_file.close(); 35 while (in_reader.takeDelimiterInclusive('\n')) |took| {
38 var in_buf = std.io.bufferedReader(in_file.reader()); 36 const line = std.mem.trimRight(u8, took, "\n");
39 const in_reader = in_buf.reader();
40
41 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
42 if (line.len == 0 or line[0] == '#') continue; 37 if (line.len == 0 or line[0] == '#') continue;
43 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 38 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
44 39
@@ -79,15 +74,20 @@ pub fn main() !void {
79 else => {}, 74 else => {},
80 } 75 }
81 } 76 }
77 } else |err| switch (err) {
78 error.EndOfStream => {},
79 else => {
80 return err;
81 },
82 } 82 }
83 83
84 var blocks_map = BlockMap.init(allocator); 84 var blocks_map = BlockMap.init(allocator);
85 defer blocks_map.deinit(); 85 defer blocks_map.deinit();
86 86
87 var stage1 = std.ArrayList(u16).init(allocator); 87 var stage1 = std.array_list.Managed(u16).init(allocator);
88 defer stage1.deinit(); 88 defer stage1.deinit();
89 89
90 var stage2 = std.ArrayList(u8).init(allocator); 90 var stage2 = std.array_list.Managed(u8).init(allocator);
91 defer stage2.deinit(); 91 defer stage2.deinit();
92 92
93 var block: Block = [_]u8{0} ** block_size; 93 var block: Block = [_]u8{0} ** block_size;
@@ -118,18 +118,17 @@ pub fn main() !void {
118 _ = args_iter.skip(); 118 _ = args_iter.skip();
119 const output_path = args_iter.next() orelse @panic("No output file arg!"); 119 const output_path = args_iter.next() orelse @panic("No output file arg!");
120 120
121 const compressor = std.compress.flate.deflate.compressor; 121 var write_buf: [4096]u8 = undefined;
122 var out_file = try std.fs.cwd().createFile(output_path, .{}); 122 var out_file = try std.fs.cwd().createFile(output_path, .{});
123 defer out_file.close(); 123 defer out_file.close();
124 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 124 var writer = out_file.writer(&write_buf);
125 const writer = out_comp.writer();
126 125
127 const endian = builtin.cpu.arch.endian(); 126 const endian = builtin.cpu.arch.endian();
128 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 127 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
129 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 128 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
130 129
131 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 130 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
132 try writer.writeAll(stage2.items); 131 try writer.interface.writeAll(stage2.items);
133 132
134 try out_comp.flush(); 133 try writer.interface.flush();
135} 134}
diff --git a/codegen/ccc.zig b/codegen/ccc.zig
index a01c8d2..4e470ae 100644
--- a/codegen/ccc.zig
+++ b/codegen/ccc.zig
@@ -21,7 +21,7 @@ const BlockMap = std.HashMap(
21 std.hash_map.default_max_load_percentage, 21 std.hash_map.default_max_load_percentage,
22); 22);
23 23
24pub fn main() !void { 24pub fn main() anyerror!void {
25 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 25 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
26 defer arena.deinit(); 26 defer arena.deinit();
27 const allocator = arena.allocator(); 27 const allocator = arena.allocator();
@@ -29,15 +29,10 @@ pub fn main() !void {
29 var flat_map = std.AutoHashMap(u21, u8).init(allocator); 29 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
30 defer flat_map.deinit(); 30 defer flat_map.deinit();
31 31
32 var line_buf: [4096]u8 = undefined;
33
34 // Process DerivedCombiningClass.txt 32 // Process DerivedCombiningClass.txt
35 var cc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedCombiningClass.txt", .{}); 33 var cc_reader = std.io.Reader.fixed(@embedFile("DerivedCombiningClass.txt"));
36 defer cc_file.close(); 34 while (cc_reader.takeDelimiterInclusive('\n')) |took| {
37 var cc_buf = std.io.bufferedReader(cc_file.reader()); 35 const line = std.mem.trimRight(u8, took, "\n");
38 const cc_reader = cc_buf.reader();
39
40 while (try cc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
41 if (line.len == 0 or line[0] == '#') continue; 36 if (line.len == 0 or line[0] == '#') continue;
42 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 37 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
43 38
@@ -68,15 +63,20 @@ pub fn main() !void {
68 else => {}, 63 else => {},
69 } 64 }
70 } 65 }
66 } else |err| switch (err) {
67 error.EndOfStream => {},
68 else => {
69 return err;
70 },
71 } 71 }
72 72
73 var blocks_map = BlockMap.init(allocator); 73 var blocks_map = BlockMap.init(allocator);
74 defer blocks_map.deinit(); 74 defer blocks_map.deinit();
75 75
76 var stage1 = std.ArrayList(u16).init(allocator); 76 var stage1 = std.array_list.Managed(u16).init(allocator);
77 defer stage1.deinit(); 77 defer stage1.deinit();
78 78
79 var stage2 = std.ArrayList(u8).init(allocator); 79 var stage2 = std.array_list.Managed(u8).init(allocator);
80 defer stage2.deinit(); 80 defer stage2.deinit();
81 81
82 var block: Block = [_]u8{0} ** block_size; 82 var block: Block = [_]u8{0} ** block_size;
@@ -107,18 +107,17 @@ pub fn main() !void {
107 _ = args_iter.skip(); 107 _ = args_iter.skip();
108 const output_path = args_iter.next() orelse @panic("No output file arg!"); 108 const output_path = args_iter.next() orelse @panic("No output file arg!");
109 109
110 const compressor = std.compress.flate.deflate.compressor; 110 var write_buf: [4096]u8 = undefined;
111 var out_file = try std.fs.cwd().createFile(output_path, .{}); 111 var out_file = try std.fs.cwd().createFile(output_path, .{});
112 defer out_file.close(); 112 defer out_file.close();
113 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 113 var writer = out_file.writer(&write_buf);
114 const writer = out_comp.writer();
115 114
116 const endian = builtin.cpu.arch.endian(); 115 const endian = builtin.cpu.arch.endian();
117 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 116 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
118 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 117 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
119 118
120 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 119 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
121 try writer.writeAll(stage2.items); 120 try writer.interface.writeAll(stage2.items);
122 121
123 try out_comp.flush(); 122 try writer.interface.flush();
124} 123}
diff --git a/codegen/compat.zig b/codegen/compat.zig
index 07616fc..debb83d 100644
--- a/codegen/compat.zig
+++ b/codegen/compat.zig
@@ -1,32 +1,28 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3 3
4pub fn main() !void { 4pub fn main() anyerror!void {
5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
6 defer arena.deinit(); 6 defer arena.deinit();
7 const allocator = arena.allocator(); 7 const allocator = arena.allocator();
8 8
9 // Process UnicodeData.txt 9 // Process UnicodeData.txt
10 var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); 10 var write_buf: [4096]u8 = undefined;
11 defer in_file.close();
12 var in_buf = std.io.bufferedReader(in_file.reader());
13 const in_reader = in_buf.reader();
14 11
12 var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt"));
15 var args_iter = try std.process.argsWithAllocator(allocator); 13 var args_iter = try std.process.argsWithAllocator(allocator);
16 defer args_iter.deinit(); 14 defer args_iter.deinit();
17 _ = args_iter.skip(); 15 _ = args_iter.skip();
18 const output_path = args_iter.next() orelse @panic("No output file arg!"); 16 const output_path = args_iter.next() orelse @panic("No output file arg!");
19 17
20 const compressor = std.compress.flate.deflate.compressor;
21 var out_file = try std.fs.cwd().createFile(output_path, .{}); 18 var out_file = try std.fs.cwd().createFile(output_path, .{});
22 defer out_file.close(); 19 defer out_file.close();
23 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 20 var writer = out_file.writer(&write_buf);
24 const writer = out_comp.writer();
25 21
26 const endian = builtin.cpu.arch.endian(); 22 const endian = builtin.cpu.arch.endian();
27 var line_buf: [4096]u8 = undefined;
28 23
29 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 24 lines: while (in_reader.takeDelimiterInclusive('\n')) |took| {
25 const line = std.mem.trimRight(u8, took, "\n");
30 if (line.len == 0) continue; 26 if (line.len == 0) continue;
31 27
32 var field_iter = std.mem.splitScalar(u8, line, ';'); 28 var field_iter = std.mem.splitScalar(u8, line, ';');
@@ -55,10 +51,15 @@ pub fn main() !void {
55 } 51 }
56 } 52 }
57 53
58 try writer.writeInt(u8, @intCast(len), endian); 54 try writer.interface.writeInt(u8, @intCast(len), endian);
59 for (cps[0..len]) |cp| try writer.writeInt(u24, cp, endian); 55 for (cps[0..len]) |cp| try writer.interface.writeInt(u24, cp, endian);
56 } else |err| switch (err) {
57 error.EndOfStream => {},
58 else => {
59 return err;
60 },
60 } 61 }
61 62
62 try writer.writeInt(u16, 0, endian); 63 try writer.interface.writeInt(u16, 0, endian);
63 try out_comp.flush(); 64 try writer.interface.flush();
64} 65}
diff --git a/codegen/core_props.zig b/codegen/core_props.zig
index f60c7a9..6ffdf91 100644
--- a/codegen/core_props.zig
+++ b/codegen/core_props.zig
@@ -22,7 +22,7 @@ const BlockMap = std.HashMap(
22 std.hash_map.default_max_load_percentage, 22 std.hash_map.default_max_load_percentage,
23); 23);
24 24
25pub fn main() !void { 25pub fn main() anyerror!void {
26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
27 defer arena.deinit(); 27 defer arena.deinit();
28 const allocator = arena.allocator(); 28 const allocator = arena.allocator();
@@ -30,15 +30,10 @@ pub fn main() !void {
30 var flat_map = std.AutoHashMap(u21, u8).init(allocator); 30 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
31 defer flat_map.deinit(); 31 defer flat_map.deinit();
32 32
33 var line_buf: [4096]u8 = undefined;
34
35 // Process DerivedCoreProperties.txt 33 // Process DerivedCoreProperties.txt
36 var in_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); 34 var in_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt"));
37 defer in_file.close(); 35 while (in_reader.takeDelimiterInclusive('\n')) |took| {
38 var in_buf = std.io.bufferedReader(in_file.reader()); 36 const line = std.mem.trimRight(u8, took, "\n");
39 const in_reader = in_buf.reader();
40
41 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
42 if (line.len == 0 or line[0] == '#') continue; 37 if (line.len == 0 or line[0] == '#') continue;
43 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 38 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
44 39
@@ -82,15 +77,19 @@ pub fn main() !void {
82 else => {}, 77 else => {},
83 } 78 }
84 } 79 }
80 } else |err| switch (err) {
81 error.EndOfStream => {},
82 else => {
83 return err;
84 },
85 } 85 }
86
87 var blocks_map = BlockMap.init(allocator); 86 var blocks_map = BlockMap.init(allocator);
88 defer blocks_map.deinit(); 87 defer blocks_map.deinit();
89 88
90 var stage1 = std.ArrayList(u16).init(allocator); 89 var stage1 = std.array_list.Managed(u16).init(allocator);
91 defer stage1.deinit(); 90 defer stage1.deinit();
92 91
93 var stage2 = std.ArrayList(u8).init(allocator); 92 var stage2 = std.array_list.Managed(u8).init(allocator);
94 defer stage2.deinit(); 93 defer stage2.deinit();
95 94
96 var block: Block = [_]u8{0} ** block_size; 95 var block: Block = [_]u8{0} ** block_size;
@@ -121,18 +120,17 @@ pub fn main() !void {
121 _ = args_iter.skip(); 120 _ = args_iter.skip();
122 const output_path = args_iter.next() orelse @panic("No output file arg!"); 121 const output_path = args_iter.next() orelse @panic("No output file arg!");
123 122
124 const compressor = std.compress.flate.deflate.compressor; 123 var out_buf: [4096]u8 = undefined;
125 var out_file = try std.fs.cwd().createFile(output_path, .{}); 124 var out_file = try std.fs.cwd().createFile(output_path, .{});
126 defer out_file.close(); 125 defer out_file.close();
127 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 126 var writer = out_file.writer(&out_buf);
128 const writer = out_comp.writer();
129 127
130 const endian = builtin.cpu.arch.endian(); 128 const endian = builtin.cpu.arch.endian();
131 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 129 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
132 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 130 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
133 131
134 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 132 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
135 try writer.writeAll(stage2.items); 133 try writer.interface.writeAll(stage2.items);
136 134
137 try out_comp.flush(); 135 try writer.interface.flush();
138} 136}
diff --git a/codegen/dwp.zig b/codegen/dwp.zig
index 5e5bf6a..75ac68e 100644
--- a/codegen/dwp.zig
+++ b/codegen/dwp.zig
@@ -23,7 +23,7 @@ const BlockMap = std.HashMap(
23 std.hash_map.default_max_load_percentage, 23 std.hash_map.default_max_load_percentage,
24); 24);
25 25
26pub fn main() !void { 26pub fn main() anyerror!void {
27 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 27 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
28 defer arena.deinit(); 28 defer arena.deinit();
29 const allocator = arena.allocator(); 29 const allocator = arena.allocator();
@@ -31,15 +31,11 @@ pub fn main() !void {
31 var flat_map = std.AutoHashMap(u21, i4).init(allocator); 31 var flat_map = std.AutoHashMap(u21, i4).init(allocator);
32 defer flat_map.deinit(); 32 defer flat_map.deinit();
33 33
34 var line_buf: [4096]u8 = undefined;
35
36 // Process DerivedEastAsianWidth.txt 34 // Process DerivedEastAsianWidth.txt
37 var deaw_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedEastAsianWidth.txt", .{}); 35 var deaw_reader = std.io.Reader.fixed(@embedFile("DerivedEastAsianWidth.txt"));
38 defer deaw_file.close();
39 var deaw_buf = std.io.bufferedReader(deaw_file.reader());
40 const deaw_reader = deaw_buf.reader();
41 36
42 while (try deaw_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 37 while (deaw_reader.takeDelimiterInclusive('\n')) |took| {
38 const line = std.mem.trimRight(u8, took, "\n");
43 if (line.len == 0) continue; 39 if (line.len == 0) continue;
44 40
45 // @missing ranges 41 // @missing ranges
@@ -88,15 +84,17 @@ pub fn main() !void {
88 else => {}, 84 else => {},
89 } 85 }
90 } 86 }
87 } else |err| switch (err) {
88 error.EndOfStream => {},
89 else => {
90 return err;
91 },
91 } 92 }
92
93 // Process DerivedGeneralCategory.txt 93 // Process DerivedGeneralCategory.txt
94 var dgc_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{}); 94 var dgc_reader = std.io.Reader.fixed(@embedFile("DerivedGeneralCategory.txt"));
95 defer dgc_file.close();
96 var dgc_buf = std.io.bufferedReader(dgc_file.reader());
97 const dgc_reader = dgc_buf.reader();
98 95
99 while (try dgc_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 96 while (dgc_reader.takeDelimiterInclusive('\n')) |took| {
97 const line = std.mem.trimRight(u8, took, "\n");
100 if (line.len == 0 or line[0] == '#') continue; 98 if (line.len == 0 or line[0] == '#') continue;
101 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 99 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
102 100
@@ -139,15 +137,20 @@ pub fn main() !void {
139 else => {}, 137 else => {},
140 } 138 }
141 } 139 }
140 } else |err| switch (err) {
141 error.EndOfStream => {},
142 else => {
143 return err;
144 },
142 } 145 }
143 146
144 var blocks_map = BlockMap.init(allocator); 147 var blocks_map = BlockMap.init(allocator);
145 defer blocks_map.deinit(); 148 defer blocks_map.deinit();
146 149
147 var stage1 = std.ArrayList(u16).init(allocator); 150 var stage1 = std.array_list.Managed(u16).init(allocator);
148 defer stage1.deinit(); 151 defer stage1.deinit();
149 152
150 var stage2 = std.ArrayList(i4).init(allocator); 153 var stage2 = std.array_list.Managed(i4).init(allocator);
151 defer stage2.deinit(); 154 defer stage2.deinit();
152 155
153 var block: Block = [_]i4{0} ** block_size; 156 var block: Block = [_]i4{0} ** block_size;
@@ -227,18 +230,17 @@ pub fn main() !void {
227 _ = args_iter.skip(); 230 _ = args_iter.skip();
228 const output_path = args_iter.next() orelse @panic("No output file arg!"); 231 const output_path = args_iter.next() orelse @panic("No output file arg!");
229 232
230 const compressor = std.compress.flate.deflate.compressor; 233 var write_buf: [4096]u8 = undefined;
231 var out_file = try std.fs.cwd().createFile(output_path, .{}); 234 var out_file = try std.fs.cwd().createFile(output_path, .{});
232 defer out_file.close(); 235 defer out_file.close();
233 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 236 var writer = out_file.writer(&write_buf);
234 const writer = out_comp.writer();
235 237
236 const endian = builtin.cpu.arch.endian(); 238 const endian = builtin.cpu.arch.endian();
237 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 239 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
238 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 240 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
239 241
240 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 242 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
241 for (stage2.items) |i| try writer.writeInt(i8, i, endian); 243 for (stage2.items) |i| try writer.interface.writeInt(i8, i, endian);
242 244
243 try out_comp.flush(); 245 try writer.interface.flush();
244} 246}
diff --git a/codegen/fold.zig b/codegen/fold.zig
index cb73cca..366ed79 100644
--- a/codegen/fold.zig
+++ b/codegen/fold.zig
@@ -2,23 +2,19 @@ const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3const mem = std.mem; 3const mem = std.mem;
4 4
5pub fn main() !void { 5pub fn main() anyerror!void {
6 var gpa = std.heap.GeneralPurposeAllocator(.{}){}; 6 var gpa = std.heap.GeneralPurposeAllocator(.{}){};
7 defer std.debug.assert(gpa.deinit() == .ok); 7 defer std.debug.assert(gpa.deinit() == .ok);
8 const allocator = gpa.allocator(); 8 const allocator = gpa.allocator();
9 9
10 // Process DerivedCoreProperties.txt 10 // Process DerivedCoreProperties.txt
11 var props_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); 11 var props_reader = std.io.Reader.fixed(@embedFile("DerivedCoreProperties.txt"));
12 defer props_file.close();
13 var props_buf = std.io.bufferedReader(props_file.reader());
14 const props_reader = props_buf.reader();
15 12
16 var props_map = std.AutoHashMap(u21, void).init(allocator); 13 var props_map = std.AutoHashMap(u21, void).init(allocator);
17 defer props_map.deinit(); 14 defer props_map.deinit();
18 15
19 var line_buf: [4096]u8 = undefined; 16 props_lines: while (props_reader.takeDelimiterInclusive('\n')) |took| {
20 17 const line = std.mem.trimRight(u8, took, "\n");
21 props_lines: while (try props_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
22 if (line.len == 0 or line[0] == '#') continue; 18 if (line.len == 0 or line[0] == '#') continue;
23 19
24 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 20 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -49,18 +45,20 @@ pub fn main() !void {
49 else => {}, 45 else => {},
50 } 46 }
51 } 47 }
48 } else |err| switch (err) {
49 error.EndOfStream => {},
50 else => {
51 return err;
52 },
52 } 53 }
53
54 var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator); 54 var codepoint_mapping = std.AutoArrayHashMap(u21, [3]u21).init(allocator);
55 defer codepoint_mapping.deinit(); 55 defer codepoint_mapping.deinit();
56 56
57 // Process CaseFolding.txt 57 // Process CaseFolding.txt
58 var cp_file = try std.fs.cwd().openFile("data/unicode/CaseFolding.txt", .{});
59 defer cp_file.close();
60 var cp_buf = std.io.bufferedReader(cp_file.reader());
61 const cp_reader = cp_buf.reader();
62 58
63 while (try cp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 59 var cp_reader = std.io.Reader.fixed(@embedFile("CaseFolding.txt"));
60 while (cp_reader.takeDelimiterInclusive('\n')) |took| {
61 const line = std.mem.trimRight(u8, took, "\n");
64 if (line.len == 0 or line[0] == '#') continue; 62 if (line.len == 0 or line[0] == '#') continue;
65 63
66 var field_it = std.mem.splitScalar(u8, line, ';'); 64 var field_it = std.mem.splitScalar(u8, line, ';');
@@ -81,9 +79,14 @@ pub fn main() !void {
81 } 79 }
82 80
83 try codepoint_mapping.putNoClobber(codepoint, mapping_buf); 81 try codepoint_mapping.putNoClobber(codepoint, mapping_buf);
82 } else |err| switch (err) {
83 error.EndOfStream => {},
84 else => {
85 return err;
86 },
84 } 87 }
85 88
86 var changes_when_casefolded_exceptions = std.ArrayList(u21).init(allocator); 89 var changes_when_casefolded_exceptions = std.array_list.Managed(u21).init(allocator);
87 defer changes_when_casefolded_exceptions.deinit(); 90 defer changes_when_casefolded_exceptions.deinit();
88 91
89 { 92 {
@@ -221,32 +224,31 @@ pub fn main() !void {
221 _ = args_iter.skip(); 224 _ = args_iter.skip();
222 const output_path = args_iter.next() orelse @panic("No output file arg!"); 225 const output_path = args_iter.next() orelse @panic("No output file arg!");
223 226
224 const compressor = std.compress.flate.deflate.compressor; 227 var write_buf: [4096]u8 = undefined;
225 var out_file = try std.fs.cwd().createFile(output_path, .{}); 228 var out_file = try std.fs.cwd().createFile(output_path, .{});
226 defer out_file.close(); 229 defer out_file.close();
227 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 230 var writer = out_file.writer(&write_buf);
228 const writer = out_comp.writer();
229 231
230 const endian = builtin.cpu.arch.endian(); 232 const endian = builtin.cpu.arch.endian();
231 // Table metadata. 233 // Table metadata.
232 try writer.writeInt(u24, @intCast(codepoint_cutoff), endian); 234 try writer.interface.writeInt(u24, @intCast(codepoint_cutoff), endian);
233 try writer.writeInt(u24, @intCast(multiple_codepoint_start), endian); 235 try writer.interface.writeInt(u24, @intCast(multiple_codepoint_start), endian);
234 // Stage 1 236 // Stage 1
235 try writer.writeInt(u16, @intCast(meaningful_stage1.len), endian); 237 try writer.interface.writeInt(u16, @intCast(meaningful_stage1.len), endian);
236 try writer.writeAll(meaningful_stage1); 238 try writer.interface.writeAll(meaningful_stage1);
237 // Stage 2 239 // Stage 2
238 try writer.writeInt(u16, @intCast(stage2.len), endian); 240 try writer.interface.writeInt(u16, @intCast(stage2.len), endian);
239 try writer.writeAll(stage2); 241 try writer.interface.writeAll(stage2);
240 // Stage 3 242 // Stage 3
241 try writer.writeInt(u16, @intCast(stage3.len), endian); 243 try writer.interface.writeInt(u16, @intCast(stage3.len), endian);
242 for (stage3) |offset| try writer.writeInt(i24, offset, endian); 244 for (stage3) |offset| try writer.interface.writeInt(i24, offset, endian);
243 // Changes when case folded 245 // Changes when case folded
244 // Min and max 246 // Min and max
245 try writer.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian); 247 try writer.interface.writeInt(u24, std.mem.min(u21, changes_when_casefolded_exceptions.items), endian);
246 try writer.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian); 248 try writer.interface.writeInt(u24, std.mem.max(u21, changes_when_casefolded_exceptions.items), endian);
247 try writer.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian); 249 try writer.interface.writeInt(u16, @intCast(changes_when_casefolded_exceptions.items.len), endian);
248 for (changes_when_casefolded_exceptions.items) |cp| try writer.writeInt(u24, cp, endian); 250 for (changes_when_casefolded_exceptions.items) |cp| try writer.interface.writeInt(u24, cp, endian);
249 251
250 try out_comp.flush(); 252 try writer.interface.flush();
251 } 253 }
252} 254}
diff --git a/codegen/gbp.zig b/codegen/gbp.zig
index 3fc4461..1d06e9a 100644
--- a/codegen/gbp.zig
+++ b/codegen/gbp.zig
@@ -47,7 +47,7 @@ const BlockMap = std.HashMap(
47 std.hash_map.default_max_load_percentage, 47 std.hash_map.default_max_load_percentage,
48); 48);
49 49
50pub fn main() !void { 50pub fn main() anyerror!void {
51 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 51 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
52 defer arena.deinit(); 52 defer arena.deinit();
53 const allocator = arena.allocator(); 53 const allocator = arena.allocator();
@@ -61,15 +61,12 @@ pub fn main() !void {
61 var emoji_set = std.AutoHashMap(u21, void).init(allocator); 61 var emoji_set = std.AutoHashMap(u21, void).init(allocator);
62 defer emoji_set.deinit(); 62 defer emoji_set.deinit();
63 63
64 var line_buf: [4096]u8 = undefined;
65
66 // Process Indic 64 // Process Indic
67 var indic_file = try std.fs.cwd().openFile("data/unicode/DerivedCoreProperties.txt", .{}); 65 const indic_file = @embedFile("DerivedCoreProperties.txt");
68 defer indic_file.close(); 66 var indic_reader = std.io.Reader.fixed(indic_file);
69 var indic_buf = std.io.bufferedReader(indic_file.reader());
70 const indic_reader = indic_buf.reader();
71 67
72 while (try indic_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 68 while (indic_reader.takeDelimiterInclusive('\n')) |took| {
69 const line = std.mem.trimRight(u8, took, "\n");
73 if (line.len == 0 or line[0] == '#') continue; 70 if (line.len == 0 or line[0] == '#') continue;
74 if (std.mem.indexOf(u8, line, "InCB") == null) continue; 71 if (std.mem.indexOf(u8, line, "InCB") == null) continue;
75 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 72 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -100,15 +97,18 @@ pub fn main() !void {
100 else => {}, 97 else => {},
101 } 98 }
102 } 99 }
100 } else |err| switch (err) {
101 error.EndOfStream => {},
102 else => {
103 return err;
104 },
103 } 105 }
104
105 // Process GBP 106 // Process GBP
106 var gbp_file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakProperty.txt", .{});
107 defer gbp_file.close();
108 var gbp_buf = std.io.bufferedReader(gbp_file.reader());
109 const gbp_reader = gbp_buf.reader();
110 107
111 while (try gbp_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 108 var gbp_reader = std.io.Reader.fixed(@embedFile("GraphemeBreakProperty.txt"));
109
110 while (gbp_reader.takeDelimiterInclusive('\n')) |took| {
111 const line = std.mem.trimRight(u8, took, "\n");
112 if (line.len == 0 or line[0] == '#') continue; 112 if (line.len == 0 or line[0] == '#') continue;
113 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 113 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
114 114
@@ -138,15 +138,18 @@ pub fn main() !void {
138 else => {}, 138 else => {},
139 } 139 }
140 } 140 }
141 } else |err| switch (err) {
142 error.EndOfStream => {},
143 else => {
144 return err;
145 },
141 } 146 }
142
143 // Process Emoji 147 // Process Emoji
144 var emoji_file = try std.fs.cwd().openFile("data/unicode/emoji/emoji-data.txt", .{});
145 defer emoji_file.close();
146 var emoji_buf = std.io.bufferedReader(emoji_file.reader());
147 const emoji_reader = emoji_buf.reader();
148 148
149 while (try emoji_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 149 var emoji_reader = std.io.Reader.fixed(@embedFile("emoji-data.txt"));
150
151 while (emoji_reader.takeDelimiterInclusive('\n')) |took| {
152 const line = std.mem.trimRight(u8, took, "\n");
150 if (line.len == 0 or line[0] == '#') continue; 153 if (line.len == 0 or line[0] == '#') continue;
151 if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue; 154 if (std.mem.indexOf(u8, line, "Extended_Pictographic") == null) continue;
152 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 155 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -170,15 +173,20 @@ pub fn main() !void {
170 else => {}, 173 else => {},
171 } 174 }
172 } 175 }
176 } else |err| switch (err) {
177 error.EndOfStream => {},
178 else => {
179 return err;
180 },
173 } 181 }
174 182
175 var blocks_map = BlockMap.init(allocator); 183 var blocks_map = BlockMap.init(allocator);
176 defer blocks_map.deinit(); 184 defer blocks_map.deinit();
177 185
178 var stage1 = std.ArrayList(u16).init(allocator); 186 var stage1 = std.array_list.Managed(u16).init(allocator);
179 defer stage1.deinit(); 187 defer stage1.deinit();
180 188
181 var stage2 = std.ArrayList(u16).init(allocator); 189 var stage2 = std.array_list.Managed(u16).init(allocator);
182 defer stage2.deinit(); 190 defer stage2.deinit();
183 191
184 var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator); 192 var stage3 = std.AutoArrayHashMap(u8, u16).init(allocator);
@@ -227,22 +235,21 @@ pub fn main() !void {
227 _ = args_iter.skip(); 235 _ = args_iter.skip();
228 const output_path = args_iter.next() orelse @panic("No output file arg!"); 236 const output_path = args_iter.next() orelse @panic("No output file arg!");
229 237
230 const compressor = std.compress.flate.deflate.compressor; 238 var write_buf: [4096]u8 = undefined;
231 var out_file = try std.fs.cwd().createFile(output_path, .{}); 239 var out_file = try std.fs.cwd().createFile(output_path, .{});
232 defer out_file.close(); 240 defer out_file.close();
233 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 241 var writer = out_file.writer(&write_buf);
234 const writer = out_comp.writer();
235 242
236 const endian = builtin.cpu.arch.endian(); 243 const endian = builtin.cpu.arch.endian();
237 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 244 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
238 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 245 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
239 246
240 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 247 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
241 for (stage2.items) |i| try writer.writeInt(u16, i, endian); 248 for (stage2.items) |i| try writer.interface.writeInt(u16, i, endian);
242 249
243 const props_bytes = stage3.keys(); 250 const props_bytes = stage3.keys();
244 try writer.writeInt(u16, @intCast(props_bytes.len), endian); 251 try writer.interface.writeInt(u16, @intCast(props_bytes.len), endian);
245 try writer.writeAll(props_bytes); 252 try writer.interface.writeAll(props_bytes);
246 253
247 try out_comp.flush(); 254 try writer.interface.flush();
248} 255}
diff --git a/codegen/gencat.zig b/codegen/gencat.zig
index fe06bd7..9800f1d 100644
--- a/codegen/gencat.zig
+++ b/codegen/gencat.zig
@@ -62,15 +62,10 @@ pub fn main() !void {
62 var flat_map = std.AutoHashMap(u21, u5).init(allocator); 62 var flat_map = std.AutoHashMap(u21, u5).init(allocator);
63 defer flat_map.deinit(); 63 defer flat_map.deinit();
64 64
65 var line_buf: [4096]u8 = undefined;
66
67 // Process DerivedGeneralCategory.txt 65 // Process DerivedGeneralCategory.txt
68 var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedGeneralCategory.txt", .{}); 66 var in_reader = std.io.Reader.fixed(@embedFile("DerivedGeneralCategory.txt"));
69 defer in_file.close(); 67 while (in_reader.takeDelimiterInclusive('\n')) |took| {
70 var in_buf = std.io.bufferedReader(in_file.reader()); 68 const line = std.mem.trimRight(u8, took, "\n");
71 const in_reader = in_buf.reader();
72
73 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
74 if (line.len == 0 or line[0] == '#') continue; 69 if (line.len == 0 or line[0] == '#') continue;
75 70
76 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 71 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -101,18 +96,23 @@ pub fn main() !void {
101 else => {}, 96 else => {},
102 } 97 }
103 } 98 }
99 } else |err| switch (err) {
100 error.EndOfStream => {},
101 else => {
102 return err;
103 },
104 } 104 }
105 105
106 var blocks_map = BlockMap.init(allocator); 106 var blocks_map = BlockMap.init(allocator);
107 defer blocks_map.deinit(); 107 defer blocks_map.deinit();
108 108
109 var stage1 = std.ArrayList(u16).init(allocator); 109 var stage1 = std.array_list.Managed(u16).init(allocator);
110 defer stage1.deinit(); 110 defer stage1.deinit();
111 111
112 var stage2 = std.ArrayList(u5).init(allocator); 112 var stage2 = std.array_list.Managed(u5).init(allocator);
113 defer stage2.deinit(); 113 defer stage2.deinit();
114 114
115 var stage3 = std.ArrayList(u5).init(allocator); 115 var stage3 = std.array_list.Managed(u5).init(allocator);
116 defer stage3.deinit(); 116 defer stage3.deinit();
117 117
118 var block: Block = [_]u5{0} ** block_size; 118 var block: Block = [_]u5{0} ** block_size;
@@ -151,21 +151,20 @@ pub fn main() !void {
151 _ = args_iter.skip(); 151 _ = args_iter.skip();
152 const output_path = args_iter.next() orelse @panic("No output file arg!"); 152 const output_path = args_iter.next() orelse @panic("No output file arg!");
153 153
154 const compressor = std.compress.flate.deflate.compressor; 154 var write_buf: [4096]u8 = undefined;
155 var out_file = try std.fs.cwd().createFile(output_path, .{}); 155 var out_file = try std.fs.cwd().createFile(output_path, .{});
156 defer out_file.close(); 156 defer out_file.close();
157 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 157 var writer = out_file.writer(&write_buf);
158 const writer = out_comp.writer();
159 158
160 const endian = builtin.cpu.arch.endian(); 159 const endian = builtin.cpu.arch.endian();
161 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 160 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
162 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 161 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
163 162
164 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 163 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
165 for (stage2.items) |i| try writer.writeInt(u8, i, endian); 164 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian);
166 165
167 try writer.writeInt(u8, @intCast(stage3.items.len), endian); 166 try writer.interface.writeInt(u8, @intCast(stage3.items.len), endian);
168 for (stage3.items) |i| try writer.writeInt(u8, i, endian); 167 for (stage3.items) |i| try writer.interface.writeInt(u8, i, endian);
169 168
170 try out_comp.flush(); 169 try writer.interface.flush();
171} 170}
diff --git a/codegen/hangul.zig b/codegen/hangul.zig
index 2c42bb7..2e4c175 100644
--- a/codegen/hangul.zig
+++ b/codegen/hangul.zig
@@ -30,7 +30,7 @@ const BlockMap = std.HashMap(
30 std.hash_map.default_max_load_percentage, 30 std.hash_map.default_max_load_percentage,
31); 31);
32 32
33pub fn main() !void { 33pub fn main() anyerror!void {
34 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 34 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
35 defer arena.deinit(); 35 defer arena.deinit();
36 const allocator = arena.allocator(); 36 const allocator = arena.allocator();
@@ -38,15 +38,10 @@ pub fn main() !void {
38 var flat_map = std.AutoHashMap(u21, u3).init(allocator); 38 var flat_map = std.AutoHashMap(u21, u3).init(allocator);
39 defer flat_map.deinit(); 39 defer flat_map.deinit();
40 40
41 var line_buf: [4096]u8 = undefined;
42
43 // Process HangulSyllableType.txt 41 // Process HangulSyllableType.txt
44 var in_file = try std.fs.cwd().openFile("data/unicode/HangulSyllableType.txt", .{}); 42 var in_reader = std.io.Reader.fixed(@embedFile("HangulSyllableType.txt"));
45 defer in_file.close(); 43 while (in_reader.takeDelimiterInclusive('\n')) |took| {
46 var in_buf = std.io.bufferedReader(in_file.reader()); 44 const line = std.mem.trimRight(u8, took, "\n");
47 const in_reader = in_buf.reader();
48
49 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
50 if (line.len == 0 or line[0] == '#') continue; 45 if (line.len == 0 or line[0] == '#') continue;
51 46
52 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 47 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -77,15 +72,19 @@ pub fn main() !void {
77 else => {}, 72 else => {},
78 } 73 }
79 } 74 }
75 } else |err| switch (err) {
76 error.EndOfStream => {},
77 else => {
78 return err;
79 },
80 } 80 }
81
82 var blocks_map = BlockMap.init(allocator); 81 var blocks_map = BlockMap.init(allocator);
83 defer blocks_map.deinit(); 82 defer blocks_map.deinit();
84 83
85 var stage1 = std.ArrayList(u16).init(allocator); 84 var stage1 = std.array_list.Managed(u16).init(allocator);
86 defer stage1.deinit(); 85 defer stage1.deinit();
87 86
88 var stage2 = std.ArrayList(u3).init(allocator); 87 var stage2 = std.array_list.Managed(u3).init(allocator);
89 defer stage2.deinit(); 88 defer stage2.deinit();
90 89
91 var block: Block = [_]u3{0} ** block_size; 90 var block: Block = [_]u3{0} ** block_size;
@@ -116,18 +115,17 @@ pub fn main() !void {
116 _ = args_iter.skip(); 115 _ = args_iter.skip();
117 const output_path = args_iter.next() orelse @panic("No output file arg!"); 116 const output_path = args_iter.next() orelse @panic("No output file arg!");
118 117
119 const compressor = std.compress.flate.deflate.compressor; 118 var write_buf: [4096]u8 = undefined;
120 var out_file = try std.fs.cwd().createFile(output_path, .{}); 119 var out_file = try std.fs.cwd().createFile(output_path, .{});
121 defer out_file.close(); 120 defer out_file.close();
122 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 121 var writer = out_file.writer(&write_buf);
123 const writer = out_comp.writer();
124 122
125 const endian = builtin.cpu.arch.endian(); 123 const endian = builtin.cpu.arch.endian();
126 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 124 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
127 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 125 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
128 126
129 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 127 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
130 for (stage2.items) |i| try writer.writeInt(u8, i, endian); 128 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian);
131 129
132 try out_comp.flush(); 130 try writer.interface.flush();
133} 131}
diff --git a/codegen/lower.zig b/codegen/lower.zig
index a053fe3..91f3ef2 100644
--- a/codegen/lower.zig
+++ b/codegen/lower.zig
@@ -6,27 +6,22 @@ pub fn main() !void {
6 defer arena.deinit(); 6 defer arena.deinit();
7 const allocator = arena.allocator(); 7 const allocator = arena.allocator();
8 8
9 var write_buf: [4096]u8 = undefined;
9 // Process UnicodeData.txt 10 // Process UnicodeData.txt
10 var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); 11 var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt"));
11 defer in_file.close();
12 var in_buf = std.io.bufferedReader(in_file.reader());
13 const in_reader = in_buf.reader();
14
15 var args_iter = try std.process.argsWithAllocator(allocator); 12 var args_iter = try std.process.argsWithAllocator(allocator);
16 defer args_iter.deinit(); 13 defer args_iter.deinit();
17 _ = args_iter.skip(); 14 _ = args_iter.skip();
18 const output_path = args_iter.next() orelse @panic("No output file arg!"); 15 const output_path = args_iter.next() orelse @panic("No output file arg!");
19 16
20 const compressor = std.compress.flate.deflate.compressor;
21 var out_file = try std.fs.cwd().createFile(output_path, .{}); 17 var out_file = try std.fs.cwd().createFile(output_path, .{});
22 defer out_file.close(); 18 defer out_file.close();
23 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 19 var writer = out_file.writer(&write_buf);
24 const writer = out_comp.writer();
25 20
26 const endian = builtin.cpu.arch.endian(); 21 const endian = builtin.cpu.arch.endian();
27 var line_buf: [4096]u8 = undefined;
28 22
29 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 23 lines: while (in_reader.takeDelimiterInclusive('\n')) |took| {
24 const line = std.mem.trimRight(u8, took, "\n");
30 if (line.len == 0) continue; 25 if (line.len == 0) continue;
31 26
32 var field_iter = std.mem.splitScalar(u8, line, ';'); 27 var field_iter = std.mem.splitScalar(u8, line, ';');
@@ -42,16 +37,20 @@ pub fn main() !void {
42 13 => { 37 13 => {
43 // Simple lowercase mapping 38 // Simple lowercase mapping
44 if (field.len == 0) continue :lines; 39 if (field.len == 0) continue :lines;
45 try writer.writeInt(i24, cp, endian); 40 try writer.interface.writeInt(i24, cp, endian);
46 const mapping = try std.fmt.parseInt(i24, field, 16); 41 const mapping = try std.fmt.parseInt(i24, field, 16);
47 try writer.writeInt(i24, mapping - cp, endian); 42 try writer.interface.writeInt(i24, mapping - cp, endian);
48 }, 43 },
49 44
50 else => {}, 45 else => {},
51 } 46 }
52 } 47 }
48 } else |err| switch (err) {
49 error.EndOfStream => {},
50 else => {
51 return err;
52 },
53 } 53 }
54 54 try writer.interface.writeInt(u24, 0, endian);
55 try writer.writeInt(u24, 0, endian); 55 try writer.interface.flush();
56 try out_comp.flush();
57} 56}
diff --git a/codegen/normp.zig b/codegen/normp.zig
index 60dabdc..eaf6989 100644
--- a/codegen/normp.zig
+++ b/codegen/normp.zig
@@ -21,7 +21,7 @@ const BlockMap = std.HashMap(
21 std.hash_map.default_max_load_percentage, 21 std.hash_map.default_max_load_percentage,
22); 22);
23 23
24pub fn main() !void { 24pub fn main() anyerror!void {
25 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 25 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
26 defer arena.deinit(); 26 defer arena.deinit();
27 const allocator = arena.allocator(); 27 const allocator = arena.allocator();
@@ -29,15 +29,10 @@ pub fn main() !void {
29 var flat_map = std.AutoHashMap(u21, u3).init(allocator); 29 var flat_map = std.AutoHashMap(u21, u3).init(allocator);
30 defer flat_map.deinit(); 30 defer flat_map.deinit();
31 31
32 var line_buf: [4096]u8 = undefined;
33
34 // Process DerivedNormalizationProps.txt 32 // Process DerivedNormalizationProps.txt
35 var in_file = try std.fs.cwd().openFile("data/unicode/DerivedNormalizationProps.txt", .{}); 33 var in_reader = std.io.Reader.fixed(@embedFile("DerivedNormalizationProps.txt"));
36 defer in_file.close(); 34 while (in_reader.takeDelimiterInclusive('\n')) |took| {
37 var in_buf = std.io.bufferedReader(in_file.reader()); 35 const line = std.mem.trimRight(u8, took, "\n");
38 const in_reader = in_buf.reader();
39
40 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
41 if (line.len == 0 or line[0] == '#') continue; 36 if (line.len == 0 or line[0] == '#') continue;
42 37
43 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 38 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -78,15 +73,19 @@ pub fn main() !void {
78 else => {}, 73 else => {},
79 } 74 }
80 } 75 }
76 } else |err| switch (err) {
77 error.EndOfStream => {},
78 else => {
79 return err;
80 },
81 } 81 }
82
83 var blocks_map = BlockMap.init(allocator); 82 var blocks_map = BlockMap.init(allocator);
84 defer blocks_map.deinit(); 83 defer blocks_map.deinit();
85 84
86 var stage1 = std.ArrayList(u16).init(allocator); 85 var stage1 = std.array_list.Managed(u16).init(allocator);
87 defer stage1.deinit(); 86 defer stage1.deinit();
88 87
89 var stage2 = std.ArrayList(u3).init(allocator); 88 var stage2 = std.array_list.Managed(u3).init(allocator);
90 defer stage2.deinit(); 89 defer stage2.deinit();
91 90
92 var block: Block = [_]u3{0} ** block_size; 91 var block: Block = [_]u3{0} ** block_size;
@@ -117,18 +116,17 @@ pub fn main() !void {
117 _ = args_iter.skip(); 116 _ = args_iter.skip();
118 const output_path = args_iter.next() orelse @panic("No output file arg!"); 117 const output_path = args_iter.next() orelse @panic("No output file arg!");
119 118
120 const compressor = std.compress.flate.deflate.compressor; 119 var write_buf: [4096]u8 = undefined;
121 var out_file = try std.fs.cwd().createFile(output_path, .{}); 120 var out_file = try std.fs.cwd().createFile(output_path, .{});
122 defer out_file.close(); 121 defer out_file.close();
123 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 122 var writer = out_file.writer(&write_buf);
124 const writer = out_comp.writer();
125 123
126 const endian = builtin.cpu.arch.endian(); 124 const endian = builtin.cpu.arch.endian();
127 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 125 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
128 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 126 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
129 127
130 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 128 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
131 for (stage2.items) |i| try writer.writeInt(u8, i, endian); 129 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian);
132 130
133 try out_comp.flush(); 131 try writer.interface.flush();
134} 132}
diff --git a/codegen/numeric.zig b/codegen/numeric.zig
index 038ac0a..b304349 100644
--- a/codegen/numeric.zig
+++ b/codegen/numeric.zig
@@ -22,7 +22,7 @@ const BlockMap = std.HashMap(
22 std.hash_map.default_max_load_percentage, 22 std.hash_map.default_max_load_percentage,
23); 23);
24 24
25pub fn main() !void { 25pub fn main() anyerror!void {
26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
27 defer arena.deinit(); 27 defer arena.deinit();
28 const allocator = arena.allocator(); 28 const allocator = arena.allocator();
@@ -30,15 +30,10 @@ pub fn main() !void {
30 var flat_map = std.AutoHashMap(u21, u8).init(allocator); 30 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
31 defer flat_map.deinit(); 31 defer flat_map.deinit();
32 32
33 var line_buf: [4096]u8 = undefined;
34
35 // Process DerivedNumericType.txt 33 // Process DerivedNumericType.txt
36 var in_file = try std.fs.cwd().openFile("data/unicode/extracted/DerivedNumericType.txt", .{}); 34 var in_reader = std.io.Reader.fixed(@embedFile("DerivedNumericType.txt"));
37 defer in_file.close(); 35 while (in_reader.takeDelimiterInclusive('\n')) |took| {
38 var in_buf = std.io.bufferedReader(in_file.reader()); 36 const line = std.mem.trimRight(u8, took, "\n");
39 const in_reader = in_buf.reader();
40
41 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
42 if (line.len == 0 or line[0] == '#') continue; 37 if (line.len == 0 or line[0] == '#') continue;
43 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 38 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
44 39
@@ -79,15 +74,20 @@ pub fn main() !void {
79 else => {}, 74 else => {},
80 } 75 }
81 } 76 }
77 } else |err| switch (err) {
78 error.EndOfStream => {},
79 else => {
80 return err;
81 },
82 } 82 }
83 83
84 var blocks_map = BlockMap.init(allocator); 84 var blocks_map = BlockMap.init(allocator);
85 defer blocks_map.deinit(); 85 defer blocks_map.deinit();
86 86
87 var stage1 = std.ArrayList(u16).init(allocator); 87 var stage1 = std.array_list.Managed(u16).init(allocator);
88 defer stage1.deinit(); 88 defer stage1.deinit();
89 89
90 var stage2 = std.ArrayList(u8).init(allocator); 90 var stage2 = std.array_list.Managed(u8).init(allocator);
91 defer stage2.deinit(); 91 defer stage2.deinit();
92 92
93 var block: Block = [_]u8{0} ** block_size; 93 var block: Block = [_]u8{0} ** block_size;
@@ -118,18 +118,17 @@ pub fn main() !void {
118 _ = args_iter.skip(); 118 _ = args_iter.skip();
119 const output_path = args_iter.next() orelse @panic("No output file arg!"); 119 const output_path = args_iter.next() orelse @panic("No output file arg!");
120 120
121 const compressor = std.compress.flate.deflate.compressor; 121 var write_buf: [4096]u8 = undefined;
122 var out_file = try std.fs.cwd().createFile(output_path, .{}); 122 var out_file = try std.fs.cwd().createFile(output_path, .{});
123 defer out_file.close(); 123 defer out_file.close();
124 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 124 var writer = out_file.writer(&write_buf);
125 const writer = out_comp.writer();
126 125
127 const endian = builtin.cpu.arch.endian(); 126 const endian = builtin.cpu.arch.endian();
128 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 127 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
129 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 128 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
130 129
131 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 130 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
132 try writer.writeAll(stage2.items); 131 try writer.interface.writeAll(stage2.items);
133 132
134 try out_comp.flush(); 133 try writer.interface.flush();
135} 134}
diff --git a/codegen/props.zig b/codegen/props.zig
index 24b22e0..35c7dfb 100644
--- a/codegen/props.zig
+++ b/codegen/props.zig
@@ -22,7 +22,7 @@ const BlockMap = std.HashMap(
22 std.hash_map.default_max_load_percentage, 22 std.hash_map.default_max_load_percentage,
23); 23);
24 24
25pub fn main() !void { 25pub fn main() anyerror!void {
26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 26 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
27 defer arena.deinit(); 27 defer arena.deinit();
28 const allocator = arena.allocator(); 28 const allocator = arena.allocator();
@@ -30,15 +30,10 @@ pub fn main() !void {
30 var flat_map = std.AutoHashMap(u21, u8).init(allocator); 30 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
31 defer flat_map.deinit(); 31 defer flat_map.deinit();
32 32
33 var line_buf: [4096]u8 = undefined;
34
35 // Process PropList.txt 33 // Process PropList.txt
36 var in_file = try std.fs.cwd().openFile("data/unicode/PropList.txt", .{}); 34 var in_reader = std.io.Reader.fixed(@embedFile("PropList.txt"));
37 defer in_file.close(); 35 while (in_reader.takeDelimiterInclusive('\n')) |took| {
38 var in_buf = std.io.bufferedReader(in_file.reader()); 36 const line = std.mem.trimRight(u8, took, "\n");
39 const in_reader = in_buf.reader();
40
41 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
42 if (line.len == 0 or line[0] == '#') continue; 37 if (line.len == 0 or line[0] == '#') continue;
43 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 38 const no_comment = if (mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
44 39
@@ -79,15 +74,20 @@ pub fn main() !void {
79 else => {}, 74 else => {},
80 } 75 }
81 } 76 }
77 } else |err| switch (err) {
78 error.EndOfStream => {},
79 else => {
80 return err;
81 },
82 } 82 }
83 83
84 var blocks_map = BlockMap.init(allocator); 84 var blocks_map = BlockMap.init(allocator);
85 defer blocks_map.deinit(); 85 defer blocks_map.deinit();
86 86
87 var stage1 = std.ArrayList(u16).init(allocator); 87 var stage1 = std.array_list.Managed(u16).init(allocator);
88 defer stage1.deinit(); 88 defer stage1.deinit();
89 89
90 var stage2 = std.ArrayList(u8).init(allocator); 90 var stage2 = std.array_list.Managed(u8).init(allocator);
91 defer stage2.deinit(); 91 defer stage2.deinit();
92 92
93 var block: Block = [_]u8{0} ** block_size; 93 var block: Block = [_]u8{0} ** block_size;
@@ -118,18 +118,16 @@ pub fn main() !void {
118 _ = args_iter.skip(); 118 _ = args_iter.skip();
119 const output_path = args_iter.next() orelse @panic("No output file arg!"); 119 const output_path = args_iter.next() orelse @panic("No output file arg!");
120 120
121 const compressor = std.compress.flate.deflate.compressor; 121 var write_buf: [4096]u8 = undefined;
122 var out_file = try std.fs.cwd().createFile(output_path, .{}); 122 var out_file = try std.fs.cwd().createFile(output_path, .{});
123 defer out_file.close(); 123 defer out_file.close();
124 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 124 var writer = out_file.writer(&write_buf);
125 const writer = out_comp.writer();
126 125
127 const endian = builtin.cpu.arch.endian(); 126 const endian = builtin.cpu.arch.endian();
128 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 127 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
129 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 128 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
130
131 try writer.writeInt(u16, @intCast(stage2.items.len), endian);
132 try writer.writeAll(stage2.items);
133 129
134 try out_comp.flush(); 130 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
131 try writer.interface.writeAll(stage2.items);
132 try writer.interface.flush();
135} 133}
diff --git a/codegen/scripts.zig b/codegen/scripts.zig
index 530205d..0f0194c 100644
--- a/codegen/scripts.zig
+++ b/codegen/scripts.zig
@@ -195,7 +195,7 @@ const BlockMap = std.HashMap(
195 std.hash_map.default_max_load_percentage, 195 std.hash_map.default_max_load_percentage,
196); 196);
197 197
198pub fn main() !void { 198pub fn main() anyerror!void {
199 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 199 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
200 defer arena.deinit(); 200 defer arena.deinit();
201 const allocator = arena.allocator(); 201 const allocator = arena.allocator();
@@ -203,15 +203,10 @@ pub fn main() !void {
203 var flat_map = std.AutoHashMap(u21, u8).init(allocator); 203 var flat_map = std.AutoHashMap(u21, u8).init(allocator);
204 defer flat_map.deinit(); 204 defer flat_map.deinit();
205 205
206 var line_buf: [4096]u8 = undefined; 206 // Process Scripts.txt
207 207 var in_reader = std.io.Reader.fixed(@embedFile("Scripts.txt"));
208 // Process DerivedGeneralCategory.txt 208 while (in_reader.takeDelimiterInclusive('\n')) |took| {
209 var in_file = try std.fs.cwd().openFile("data/unicode/Scripts.txt", .{}); 209 const line = std.mem.trimRight(u8, took, "\n");
210 defer in_file.close();
211 var in_buf = std.io.bufferedReader(in_file.reader());
212 const in_reader = in_buf.reader();
213
214 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
215 if (line.len == 0 or line[0] == '#') continue; 210 if (line.len == 0 or line[0] == '#') continue;
216 211
217 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 212 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -245,18 +240,22 @@ pub fn main() !void {
245 else => {}, 240 else => {},
246 } 241 }
247 } 242 }
243 } else |err| switch (err) {
244 error.EndOfStream => {},
245 else => {
246 return err;
247 },
248 } 248 }
249
250 var blocks_map = BlockMap.init(allocator); 249 var blocks_map = BlockMap.init(allocator);
251 defer blocks_map.deinit(); 250 defer blocks_map.deinit();
252 251
253 var stage1 = std.ArrayList(u16).init(allocator); 252 var stage1 = std.array_list.Managed(u16).init(allocator);
254 defer stage1.deinit(); 253 defer stage1.deinit();
255 254
256 var stage2 = std.ArrayList(u8).init(allocator); 255 var stage2 = std.array_list.Managed(u8).init(allocator);
257 defer stage2.deinit(); 256 defer stage2.deinit();
258 257
259 var stage3 = std.ArrayList(u8).init(allocator); 258 var stage3 = std.array_list.Managed(u8).init(allocator);
260 defer stage3.deinit(); 259 defer stage3.deinit();
261 260
262 var block: Block = [_]u8{0} ** block_size; 261 var block: Block = [_]u8{0} ** block_size;
@@ -295,21 +294,20 @@ pub fn main() !void {
295 _ = args_iter.skip(); 294 _ = args_iter.skip();
296 const output_path = args_iter.next() orelse @panic("No output file arg!"); 295 const output_path = args_iter.next() orelse @panic("No output file arg!");
297 296
298 const compressor = std.compress.flate.deflate.compressor; 297 var write_buf: [4096]u8 = undefined;
299 var out_file = try std.fs.cwd().createFile(output_path, .{}); 298 var out_file = try std.fs.cwd().createFile(output_path, .{});
300 defer out_file.close(); 299 defer out_file.close();
301 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 300 var writer = out_file.writer(&write_buf);
302 const writer = out_comp.writer();
303 301
304 const endian = builtin.cpu.arch.endian(); 302 const endian = builtin.cpu.arch.endian();
305 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 303 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
306 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 304 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
307 305
308 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 306 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
309 for (stage2.items) |i| try writer.writeInt(u8, i, endian); 307 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian);
310 308
311 try writer.writeInt(u8, @intCast(stage3.items.len), endian); 309 try writer.interface.writeInt(u8, @intCast(stage3.items.len), endian);
312 for (stage3.items) |i| try writer.writeInt(u8, i, endian); 310 for (stage3.items) |i| try writer.interface.writeInt(u8, i, endian);
313 311
314 try out_comp.flush(); 312 try writer.interface.flush();
315} 313}
diff --git a/codegen/upper.zig b/codegen/upper.zig
index 5848911..5eb29e7 100644
--- a/codegen/upper.zig
+++ b/codegen/upper.zig
@@ -1,32 +1,26 @@
1const std = @import("std"); 1const std = @import("std");
2const builtin = @import("builtin"); 2const builtin = @import("builtin");
3 3
4pub fn main() !void { 4pub fn main() anyerror!void {
5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 5 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
6 defer arena.deinit(); 6 defer arena.deinit();
7 const allocator = arena.allocator(); 7 const allocator = arena.allocator();
8 8
9 var write_buf: [4096]u8 = undefined;
9 // Process UnicodeData.txt 10 // Process UnicodeData.txt
10 var in_file = try std.fs.cwd().openFile("data/unicode/UnicodeData.txt", .{}); 11 var in_reader = std.io.Reader.fixed(@embedFile("UnicodeData.txt"));
11 defer in_file.close();
12 var in_buf = std.io.bufferedReader(in_file.reader());
13 const in_reader = in_buf.reader();
14
15 var args_iter = try std.process.argsWithAllocator(allocator); 12 var args_iter = try std.process.argsWithAllocator(allocator);
16 defer args_iter.deinit(); 13 defer args_iter.deinit();
17 _ = args_iter.skip(); 14 _ = args_iter.skip();
18 const output_path = args_iter.next() orelse @panic("No output file arg!"); 15 const output_path = args_iter.next() orelse @panic("No output file arg!");
19 16
20 const compressor = std.compress.flate.deflate.compressor;
21 var out_file = try std.fs.cwd().createFile(output_path, .{}); 17 var out_file = try std.fs.cwd().createFile(output_path, .{});
22 defer out_file.close(); 18 defer out_file.close();
23 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 19 var file_writer = out_file.writer(&write_buf);
24 const writer = out_comp.writer();
25
26 const endian = builtin.cpu.arch.endian(); 20 const endian = builtin.cpu.arch.endian();
27 var line_buf: [4096]u8 = undefined;
28 21
29 lines: while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| { 22 lines: while (in_reader.takeDelimiterInclusive('\n')) |took| {
23 const line = std.mem.trimRight(u8, took, "\n");
30 if (line.len == 0) continue; 24 if (line.len == 0) continue;
31 25
32 var field_iter = std.mem.splitScalar(u8, line, ';'); 26 var field_iter = std.mem.splitScalar(u8, line, ';');
@@ -42,16 +36,21 @@ pub fn main() !void {
42 12 => { 36 12 => {
43 // Simple uppercase mapping 37 // Simple uppercase mapping
44 if (field.len == 0) continue :lines; 38 if (field.len == 0) continue :lines;
45 try writer.writeInt(i24, cp, endian); 39 try file_writer.interface.writeInt(i24, cp, endian);
46 const mapping = try std.fmt.parseInt(i24, field, 16); 40 const mapping = try std.fmt.parseInt(i24, field, 16);
47 try writer.writeInt(i24, mapping - cp, endian); 41 try file_writer.interface.writeInt(i24, mapping - cp, endian);
48 }, 42 },
49 43
50 else => {}, 44 else => {},
51 } 45 }
52 } 46 }
47 } else |err| switch (err) {
48 error.EndOfStream => {},
49 else => {
50 return err;
51 },
53 } 52 }
54 53
55 try writer.writeInt(u24, 0, endian); 54 try file_writer.interface.writeInt(u24, 0, endian);
56 try out_comp.flush(); 55 try file_writer.interface.flush();
57} 56}
diff --git a/codegen/wbp.zig b/codegen/wbp.zig
index 741103e..33eeea5 100644
--- a/codegen/wbp.zig
+++ b/codegen/wbp.zig
@@ -43,7 +43,7 @@ const BlockMap = std.HashMap(
43 std.hash_map.default_max_load_percentage, 43 std.hash_map.default_max_load_percentage,
44); 44);
45 45
46pub fn main() !void { 46pub fn main() anyerror!void {
47 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); 47 var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
48 defer arena.deinit(); 48 defer arena.deinit();
49 const allocator = arena.allocator(); 49 const allocator = arena.allocator();
@@ -51,15 +51,11 @@ pub fn main() !void {
51 var flat_map = std.AutoHashMap(u21, u5).init(allocator); 51 var flat_map = std.AutoHashMap(u21, u5).init(allocator);
52 defer flat_map.deinit(); 52 defer flat_map.deinit();
53 53
54 var line_buf: [4096]u8 = undefined;
55
56 // Process HangulSyllableType.txt 54 // Process HangulSyllableType.txt
57 var in_file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakProperty.txt", .{}); 55 const in_file = @embedFile("WordBreakProperty.txt");
58 defer in_file.close(); 56 var in_reader = std.io.Reader.fixed(in_file);
59 var in_buf = std.io.bufferedReader(in_file.reader()); 57 while (in_reader.takeDelimiterInclusive('\n')) |took| {
60 const in_reader = in_buf.reader(); 58 const line = std.mem.trimRight(u8, took, "\n");
61
62 while (try in_reader.readUntilDelimiterOrEof(&line_buf, '\n')) |line| {
63 if (line.len == 0 or line[0] == '#') continue; 59 if (line.len == 0 or line[0] == '#') continue;
64 60
65 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line; 61 const no_comment = if (std.mem.indexOfScalar(u8, line, '#')) |octo| line[0..octo] else line;
@@ -90,15 +86,19 @@ pub fn main() !void {
90 else => {}, 86 else => {},
91 } 87 }
92 } 88 }
89 } else |err| switch (err) {
90 error.EndOfStream => {},
91 else => {
92 return err;
93 },
93 } 94 }
94
95 var blocks_map = BlockMap.init(allocator); 95 var blocks_map = BlockMap.init(allocator);
96 defer blocks_map.deinit(); 96 defer blocks_map.deinit();
97 97
98 var stage1 = std.ArrayList(u16).init(allocator); 98 var stage1 = std.array_list.Managed(u16).init(allocator);
99 defer stage1.deinit(); 99 defer stage1.deinit();
100 100
101 var stage2 = std.ArrayList(u5).init(allocator); 101 var stage2 = std.array_list.Managed(u5).init(allocator);
102 defer stage2.deinit(); 102 defer stage2.deinit();
103 103
104 var block: Block = [_]u5{0} ** block_size; 104 var block: Block = [_]u5{0} ** block_size;
@@ -129,18 +129,17 @@ pub fn main() !void {
129 _ = args_iter.skip(); 129 _ = args_iter.skip();
130 const output_path = args_iter.next() orelse @panic("No output file arg!"); 130 const output_path = args_iter.next() orelse @panic("No output file arg!");
131 131
132 const compressor = std.compress.flate.deflate.compressor; 132 var write_buf: [4096]u8 = undefined;
133 var out_file = try std.fs.cwd().createFile(output_path, .{}); 133 var out_file = try std.fs.cwd().createFile(output_path, .{});
134 defer out_file.close(); 134 defer out_file.close();
135 var out_comp = try compressor(.raw, out_file.writer(), .{ .level = .best }); 135 var writer = out_file.writer(&write_buf);
136 const writer = out_comp.writer();
137 136
138 const endian = builtin.cpu.arch.endian(); 137 const endian = builtin.cpu.arch.endian();
139 try writer.writeInt(u16, @intCast(stage1.items.len), endian); 138 try writer.interface.writeInt(u16, @intCast(stage1.items.len), endian);
140 for (stage1.items) |i| try writer.writeInt(u16, i, endian); 139 for (stage1.items) |i| try writer.interface.writeInt(u16, i, endian);
141 140
142 try writer.writeInt(u16, @intCast(stage2.items.len), endian); 141 try writer.interface.writeInt(u16, @intCast(stage2.items.len), endian);
143 for (stage2.items) |i| try writer.writeInt(u8, i, endian); 142 for (stage2.items) |i| try writer.interface.writeInt(u8, i, endian);
144 143
145 try out_comp.flush(); 144 try writer.interface.flush();
146} 145}
diff --git a/src/CanonData.zig b/src/CanonData.zig
index 5d2332a..cf9dc8a 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -7,11 +7,9 @@ cps: []u21 = undefined,
7const CanonData = @This(); 7const CanonData = @This();
8 8
9pub fn init(allocator: mem.Allocator) !CanonData { 9pub fn init(allocator: mem.Allocator) !CanonData {
10 const decompressor = compress.flate.inflate.decompressor;
11 const in_bytes = @embedFile("canon"); 10 const in_bytes = @embedFile("canon");
12 var in_fbs = std.io.fixedBufferStream(in_bytes); 11 var in_fbs = std.io.fixedBufferStream(in_bytes);
13 var in_decomp = decompressor(.raw, in_fbs.reader()); 12 var reader = in_fbs.reader();
14 var reader = in_decomp.reader();
15 13
16 const endian = builtin.cpu.arch.endian(); 14 const endian = builtin.cpu.arch.endian();
17 var cdata = CanonData{ 15 var cdata = CanonData{
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index ff41b3e..df86b92 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -48,11 +48,9 @@ fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void
48} 48}
49 49
50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { 50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void {
51 const decompressor = compress.flate.inflate.decompressor;
52 const in_bytes = @embedFile("fold"); 51 const in_bytes = @embedFile("fold");
53 var in_fbs = std.io.fixedBufferStream(in_bytes); 52 var in_fbs = std.io.fixedBufferStream(in_bytes);
54 var in_decomp = decompressor(.raw, in_fbs.reader()); 53 var reader = in_fbs.reader();
55 var reader = in_decomp.reader();
56 54
57 const endian = builtin.cpu.arch.endian(); 55 const endian = builtin.cpu.arch.endian();
58 56
@@ -123,7 +121,7 @@ pub fn caseFoldAlloc(
123 allocator: Allocator, 121 allocator: Allocator,
124 cps: []const u21, 122 cps: []const u21,
125) Allocator.Error![]const u21 { 123) Allocator.Error![]const u21 {
126 var cfcps = std.ArrayList(u21).init(allocator); 124 var cfcps = std.array_list.Managed(u21).init(allocator);
127 defer cfcps.deinit(); 125 defer cfcps.deinit();
128 var buf: [3]u21 = undefined; 126 var buf: [3]u21 = undefined;
129 127
diff --git a/src/CombiningData.zig b/src/CombiningData.zig
index fd64a3b..f58e0de 100644
--- a/src/CombiningData.zig
+++ b/src/CombiningData.zig
@@ -6,11 +6,9 @@ s2: []u8 = undefined,
6const CombiningData = @This(); 6const CombiningData = @This();
7 7
8pub fn init(allocator: mem.Allocator) !CombiningData { 8pub fn init(allocator: mem.Allocator) !CombiningData {
9 const decompressor = compress.flate.inflate.decompressor;
10 const in_bytes = @embedFile("ccc"); 9 const in_bytes = @embedFile("ccc");
11 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 var in_fbs = std.io.fixedBufferStream(in_bytes);
12 var in_decomp = decompressor(.raw, in_fbs.reader()); 11 var reader = in_fbs.reader();
13 var reader = in_decomp.reader();
14 12
15 const endian = builtin.cpu.arch.endian(); 13 const endian = builtin.cpu.arch.endian();
16 14
@@ -46,5 +44,4 @@ pub fn isStarter(cbdata: CombiningData, cp: u21) bool {
46 44
47const std = @import("std"); 45const std = @import("std");
48const builtin = @import("builtin"); 46const builtin = @import("builtin");
49const compress = std.compress;
50const mem = std.mem; 47const mem = std.mem;
diff --git a/src/CompatData.zig b/src/CompatData.zig
index 794abca..40ecd12 100644
--- a/src/CompatData.zig
+++ b/src/CompatData.zig
@@ -6,11 +6,9 @@ cps: []u21 = undefined,
6const CompatData = @This(); 6const CompatData = @This();
7 7
8pub fn init(allocator: mem.Allocator) !CompatData { 8pub fn init(allocator: mem.Allocator) !CompatData {
9 const decompressor = compress.flate.inflate.decompressor;
10 const in_bytes = @embedFile("compat"); 9 const in_bytes = @embedFile("compat");
11 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 var in_fbs = std.io.fixedBufferStream(in_bytes);
12 var in_decomp = decompressor(.raw, in_fbs.reader()); 11 var reader = in_fbs.reader();
13 var reader = in_decomp.reader();
14 12
15 const endian = builtin.cpu.arch.endian(); 13 const endian = builtin.cpu.arch.endian();
16 var cpdata = CompatData{ 14 var cpdata = CompatData{
@@ -55,6 +53,5 @@ pub fn toNfkd(cpdata: *const CompatData, cp: u21) []u21 {
55 53
56const std = @import("std"); 54const std = @import("std");
57const builtin = @import("builtin"); 55const builtin = @import("builtin");
58const compress = std.compress;
59const mem = std.mem; 56const mem = std.mem;
60const magic = @import("magic"); 57const magic = @import("magic");
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig
index 629087b..dee7ebd 100644
--- a/src/DisplayWidth.zig
+++ b/src/DisplayWidth.zig
@@ -39,11 +39,9 @@ pub fn setupWithGraphemes(dw: *DisplayWidth, allocator: Allocator, graphemes: Gr
39 39
40// Sets up the DisplayWidthData, leaving the GraphemeData undefined. 40// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
41pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { 41pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void {
42 const decompressor = compress.flate.inflate.decompressor;
43 const in_bytes = @embedFile("dwp"); 42 const in_bytes = @embedFile("dwp");
44 var in_fbs = std.io.fixedBufferStream(in_bytes); 43 var in_fbs = std.io.fixedBufferStream(in_bytes);
45 var in_decomp = decompressor(.raw, in_fbs.reader()); 44 var reader = in_fbs.reader();
46 var reader = in_decomp.reader();
47 45
48 const endian = builtin.cpu.arch.endian(); 46 const endian = builtin.cpu.arch.endian();
49 47
@@ -118,6 +116,8 @@ pub fn graphemeClusterWidth(dw: DisplayWidth, gc: []const u8) isize {
118 // emoji text sequence. 116 // emoji text sequence.
119 if (ncp.code == 0xFE0E) w = 1; 117 if (ncp.code == 0xFE0E) w = 1;
120 if (ncp.code == 0xFE0F) w = 2; 118 if (ncp.code == 0xFE0F) w = 2;
119 // Skin tones
120 if (0x1F3FB <= ncp.code and ncp.code <= 0x1F3FF) w = 2;
121 } 121 }
122 122
123 // Only adding width of first non-zero-width code point. 123 // Only adding width of first non-zero-width code point.
@@ -207,6 +207,9 @@ test "strWidth" {
207 try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); 207 try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
208 try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); 208 try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나"));
209 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); 209 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}"));
210
211 // https://codeberg.org/atman/zg/issues/82
212 try testing.expectEqual(@as(usize, 12), dw.strWidth("✍️✍🏻✍🏼✍🏽✍🏾✍🏿"));
210} 213}
211 214
212/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. 215/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
@@ -404,7 +407,7 @@ pub fn wrap(
404 columns: usize, 407 columns: usize,
405 threshold: usize, 408 threshold: usize,
406) ![]u8 { 409) ![]u8 {
407 var result = ArrayList(u8).init(allocator); 410 var result = std.array_list.Managed(u8).init(allocator);
408 defer result.deinit(); 411 defer result.deinit();
409 412
410 var line_iter = mem.tokenizeAny(u8, str, "\r\n"); 413 var line_iter = mem.tokenizeAny(u8, str, "\r\n");
@@ -426,8 +429,10 @@ pub fn wrap(
426 } 429 }
427 430
428 // Remove trailing space and newline. 431 // Remove trailing space and newline.
429 _ = result.pop(); 432 if (result.items[result.items.len - 1] == '\n')
430 _ = result.pop(); 433 _ = result.pop();
434 if (result.items[result.items.len - 1] == ' ')
435 _ = result.pop();
431 436
432 return try result.toOwnedSlice(); 437 return try result.toOwnedSlice();
433} 438}
@@ -444,6 +449,18 @@ test "wrap" {
444 try testing.expectEqualStrings(want, got); 449 try testing.expectEqualStrings(want, got);
445} 450}
446 451
452test "zg/74" {
453 var debug_alloc = std.heap.DebugAllocator(.{}).init;
454 const allocator = debug_alloc.allocator();
455 defer _ = debug_alloc.deinit();
456 const dw = try DisplayWidth.init(allocator);
457 defer dw.deinit(allocator);
458 const wrapped = try dw.wrap(allocator, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam pellentesque pulvinar felis, sit amet commodo ligula feugiat sed. Sed quis malesuada elit, nec eleifend lectus. Sed tincidunt finibus aliquet. Praesent consectetur nibh libero, tempus imperdiet lorem congue eget.", 16, 1);
459 defer allocator.free(wrapped);
460 const expected_wrap = "Lorem ipsum dolor \nsit amet, consectetur \nadipiscing elit. \nNullam pellentesque \npulvinar felis, \nsit amet commodo \nligula feugiat \nsed. Sed quis malesuada \nelit, nec eleifend \nlectus. Sed tincidunt \nfinibus aliquet. \nPraesent consectetur \nnibh libero, tempus \nimperdiet lorem \ncongue eget.";
461 try std.testing.expectEqualStrings(expected_wrap, wrapped);
462}
463
447fn testAllocation(allocator: Allocator) !void { 464fn testAllocation(allocator: Allocator) !void {
448 { 465 {
449 var dw = try DisplayWidth.init(allocator); 466 var dw = try DisplayWidth.init(allocator);
@@ -464,8 +481,6 @@ test "allocation test" {
464const std = @import("std"); 481const std = @import("std");
465const builtin = @import("builtin"); 482const builtin = @import("builtin");
466const options = @import("options"); 483const options = @import("options");
467const ArrayList = std.ArrayList;
468const compress = std.compress;
469const mem = std.mem; 484const mem = std.mem;
470const Allocator = mem.Allocator; 485const Allocator = mem.Allocator;
471const simd = std.simd; 486const simd = std.simd;
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig
index 8c1b6a3..eee7e56 100644
--- a/src/GeneralCategories.zig
+++ b/src/GeneralCategories.zig
@@ -47,11 +47,9 @@ pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories {
47} 47}
48 48
49pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { 49pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void {
50 const decompressor = compress.flate.inflate.decompressor;
51 const in_bytes = @embedFile("gencat"); 50 const in_bytes = @embedFile("gencat");
52 var in_fbs = std.io.fixedBufferStream(in_bytes); 51 var in_fbs = std.io.fixedBufferStream(in_bytes);
53 var in_decomp = decompressor(.raw, in_fbs.reader()); 52 var reader = in_fbs.reader();
54 var reader = in_decomp.reader();
55 53
56 const endian = builtin.cpu.arch.endian(); 54 const endian = builtin.cpu.arch.endian();
57 55
diff --git a/src/Graphemes.zig b/src/Graphemes.zig
index f1c56ed..81d874c 100644
--- a/src/Graphemes.zig
+++ b/src/Graphemes.zig
@@ -16,11 +16,9 @@ pub fn init(allocator: Allocator) Allocator.Error!Graphemes {
16} 16}
17 17
18pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { 18pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void {
19 const decompressor = compress.flate.inflate.decompressor;
20 const in_bytes = @embedFile("gbp"); 19 const in_bytes = @embedFile("gbp");
21 var in_fbs = std.io.fixedBufferStream(in_bytes); 20 var in_fbs = std.io.fixedBufferStream(in_bytes);
22 var in_decomp = decompressor(.raw, in_fbs.reader()); 21 var reader = in_fbs.reader();
23 var reader = in_decomp.reader();
24 22
25 const endian = builtin.cpu.arch.endian(); 23 const endian = builtin.cpu.arch.endian();
26 24
diff --git a/src/HangulData.zig b/src/HangulData.zig
index 8c5f3ad..cae8b97 100644
--- a/src/HangulData.zig
+++ b/src/HangulData.zig
@@ -15,11 +15,9 @@ s2: []u3 = undefined,
15const Hangul = @This(); 15const Hangul = @This();
16 16
17pub fn init(allocator: mem.Allocator) !Hangul { 17pub fn init(allocator: mem.Allocator) !Hangul {
18 const decompressor = compress.flate.inflate.decompressor;
19 const in_bytes = @embedFile("hangul"); 18 const in_bytes = @embedFile("hangul");
20 var in_fbs = std.io.fixedBufferStream(in_bytes); 19 var in_fbs = std.io.fixedBufferStream(in_bytes);
21 var in_decomp = decompressor(.raw, in_fbs.reader()); 20 var reader = in_fbs.reader();
22 var reader = in_decomp.reader();
23 21
24 const endian = builtin.cpu.arch.endian(); 22 const endian = builtin.cpu.arch.endian();
25 var hangul = Hangul{}; 23 var hangul = Hangul{};
@@ -49,6 +47,5 @@ pub fn syllable(hangul: *const Hangul, cp: u21) Syllable {
49 47
50const std = @import("std"); 48const std = @import("std");
51const builtin = @import("builtin"); 49const builtin = @import("builtin");
52const compress = std.compress;
53const mem = std.mem; 50const mem = std.mem;
54const testing = std.testing; 51const testing = std.testing;
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig
index 11a3e96..33096fc 100644
--- a/src/LetterCasing.zig
+++ b/src/LetterCasing.zig
@@ -22,7 +22,6 @@ pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void {
22} 22}
23 23
24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { 24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
25 const decompressor = compress.flate.inflate.decompressor;
26 const endian = builtin.cpu.arch.endian(); 25 const endian = builtin.cpu.arch.endian();
27 26
28 self.case_map = try allocator.alloc([2]u21, 0x110000); 27 self.case_map = try allocator.alloc([2]u21, 0x110000);
@@ -36,8 +35,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
36 // Uppercase 35 // Uppercase
37 const upper_bytes = @embedFile("upper"); 36 const upper_bytes = @embedFile("upper");
38 var upper_fbs = std.io.fixedBufferStream(upper_bytes); 37 var upper_fbs = std.io.fixedBufferStream(upper_bytes);
39 var upper_decomp = decompressor(.raw, upper_fbs.reader()); 38 var upper_reader = upper_fbs.reader();
40 var upper_reader = upper_decomp.reader();
41 39
42 while (true) { 40 while (true) {
43 const cp = try upper_reader.readInt(i24, endian); 41 const cp = try upper_reader.readInt(i24, endian);
@@ -49,8 +47,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
49 // Lowercase 47 // Lowercase
50 const lower_bytes = @embedFile("lower"); 48 const lower_bytes = @embedFile("lower");
51 var lower_fbs = std.io.fixedBufferStream(lower_bytes); 49 var lower_fbs = std.io.fixedBufferStream(lower_bytes);
52 var lower_decomp = decompressor(.raw, lower_fbs.reader()); 50 var lower_reader = lower_fbs.reader();
53 var lower_reader = lower_decomp.reader();
54 51
55 while (true) { 52 while (true) {
56 const cp = try lower_reader.readInt(i24, endian); 53 const cp = try lower_reader.readInt(i24, endian);
@@ -62,8 +59,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
62 // Case properties 59 // Case properties
63 const cp_bytes = @embedFile("case_prop"); 60 const cp_bytes = @embedFile("case_prop");
64 var cp_fbs = std.io.fixedBufferStream(cp_bytes); 61 var cp_fbs = std.io.fixedBufferStream(cp_bytes);
65 var cp_decomp = decompressor(.raw, cp_fbs.reader()); 62 var cp_reader = cp_fbs.reader();
66 var cp_reader = cp_decomp.reader();
67 63
68 const stage_1_len: u16 = try cp_reader.readInt(u16, endian); 64 const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
69 self.prop_s1 = try allocator.alloc(u16, stage_1_len); 65 self.prop_s1 = try allocator.alloc(u16, stage_1_len);
@@ -122,7 +118,7 @@ pub fn toUpperStr(
122 allocator: mem.Allocator, 118 allocator: mem.Allocator,
123 str: []const u8, 119 str: []const u8,
124) ![]u8 { 120) ![]u8 {
125 var bytes = std.ArrayList(u8).init(allocator); 121 var bytes = std.array_list.Managed(u8).init(allocator);
126 defer bytes.deinit(); 122 defer bytes.deinit();
127 123
128 var iter = CodePointIterator{ .bytes = str }; 124 var iter = CodePointIterator{ .bytes = str };
@@ -180,7 +176,7 @@ pub fn toLowerStr(
180 allocator: mem.Allocator, 176 allocator: mem.Allocator,
181 str: []const u8, 177 str: []const u8,
182) ![]u8 { 178) ![]u8 {
183 var bytes = std.ArrayList(u8).init(allocator); 179 var bytes = std.array_list.Managed(u8).init(allocator);
184 defer bytes.deinit(); 180 defer bytes.deinit();
185 181
186 var iter = CodePointIterator{ .bytes = str }; 182 var iter = CodePointIterator{ .bytes = str };
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig
index ca69569..7b53542 100644
--- a/src/NormPropsData.zig
+++ b/src/NormPropsData.zig
@@ -6,11 +6,9 @@ s2: []u4 = undefined,
6const NormProps = @This(); 6const NormProps = @This();
7 7
8pub fn init(allocator: mem.Allocator) !NormProps { 8pub fn init(allocator: mem.Allocator) !NormProps {
9 const decompressor = compress.flate.inflate.decompressor;
10 const in_bytes = @embedFile("normp"); 9 const in_bytes = @embedFile("normp");
11 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 var in_fbs = std.io.fixedBufferStream(in_bytes);
12 var in_decomp = decompressor(.raw, in_fbs.reader()); 11 var reader = in_fbs.reader();
13 var reader = in_decomp.reader();
14 12
15 const endian = builtin.cpu.arch.endian(); 13 const endian = builtin.cpu.arch.endian();
16 var norms = NormProps{}; 14 var norms = NormProps{};
@@ -50,6 +48,5 @@ pub fn isFcx(norms: *const NormProps, cp: u21) bool {
50 48
51const std = @import("std"); 49const std = @import("std");
52const builtin = @import("builtin"); 50const builtin = @import("builtin");
53const compress = std.compress;
54const mem = std.mem; 51const mem = std.mem;
55const testing = std.testing; 52const testing = std.testing;
diff --git a/src/Normalize.zig b/src/Normalize.zig
index 989ec29..4a1bae8 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -305,7 +305,7 @@ pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Er
305} 305}
306 306
307pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { 307pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 {
308 var dcp_list = std.ArrayList(u21).init(allocator); 308 var dcp_list = std.array_list.Managed(u21).init(allocator);
309 defer dcp_list.deinit(); 309 defer dcp_list.deinit();
310 310
311 var cp_iter = CodePointIterator{ .bytes = str }; 311 var cp_iter = CodePointIterator{ .bytes = str };
@@ -332,7 +332,7 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
332 const dcps = try self.nfxdCodePoints(allocator, str, form); 332 const dcps = try self.nfxdCodePoints(allocator, str, form);
333 defer allocator.free(dcps); 333 defer allocator.free(dcps);
334 334
335 var dstr_list = std.ArrayList(u8).init(allocator); 335 var dstr_list = std.array_list.Managed(u8).init(allocator);
336 defer dstr_list.deinit(); 336 defer dstr_list.deinit();
337 var buf: [4]u8 = undefined; 337 var buf: [4]u8 = undefined;
338 338
@@ -393,7 +393,7 @@ pub fn nfdCodePoints(
393 allocator: Allocator, 393 allocator: Allocator,
394 cps: []const u21, 394 cps: []const u21,
395) Allocator.Error![]u21 { 395) Allocator.Error![]u21 {
396 var dcp_list = std.ArrayList(u21).init(allocator); 396 var dcp_list = std.array_list.Managed(u21).init(allocator);
397 defer dcp_list.deinit(); 397 defer dcp_list.deinit();
398 398
399 var dc_buf: [18]u21 = undefined; 399 var dc_buf: [18]u21 = undefined;
@@ -418,7 +418,7 @@ pub fn nfkdCodePoints(
418 allocator: Allocator, 418 allocator: Allocator,
419 cps: []const u21, 419 cps: []const u21,
420) Allocator.Error![]u21 { 420) Allocator.Error![]u21 {
421 var dcp_list = std.ArrayList(u21).init(allocator); 421 var dcp_list = std.array_list.Managed(u21).init(allocator);
422 defer dcp_list.deinit(); 422 defer dcp_list.deinit();
423 423
424 var dc_buf: [18]u21 = undefined; 424 var dc_buf: [18]u21 = undefined;
@@ -560,7 +560,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
560 // If we have no deletions. the code point sequence 560 // If we have no deletions. the code point sequence
561 // has been fully composed. 561 // has been fully composed.
562 if (deleted == 0) { 562 if (deleted == 0) {
563 var cstr_list = std.ArrayList(u8).init(allocator); 563 var cstr_list = std.array_list.Managed(u8).init(allocator);
564 defer cstr_list.deinit(); 564 defer cstr_list.deinit();
565 var buf: [4]u8 = undefined; 565 var buf: [4]u8 = undefined;
566 566
diff --git a/src/Properties.zig b/src/Properties.zig
index 73602a0..432d176 100644
--- a/src/Properties.zig
+++ b/src/Properties.zig
@@ -25,14 +25,12 @@ pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void {
25} 25}
26 26
27inline fn setupInner(props: *Properties, allocator: Allocator) !void { 27inline fn setupInner(props: *Properties, allocator: Allocator) !void {
28 const decompressor = compress.flate.inflate.decompressor;
29 const endian = builtin.cpu.arch.endian(); 28 const endian = builtin.cpu.arch.endian();
30 29
31 // Process DerivedCoreProperties.txt 30 // Process DerivedCoreProperties.txt
32 const core_bytes = @embedFile("core_props"); 31 const core_bytes = @embedFile("core_props");
33 var core_fbs = std.io.fixedBufferStream(core_bytes); 32 var core_fbs = std.io.fixedBufferStream(core_bytes);
34 var core_decomp = decompressor(.raw, core_fbs.reader()); 33 var core_reader = core_fbs.reader();
35 var core_reader = core_decomp.reader();
36 34
37 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); 35 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
38 props.core_s1 = try allocator.alloc(u16, core_stage_1_len); 36 props.core_s1 = try allocator.alloc(u16, core_stage_1_len);
@@ -47,8 +45,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void {
47 // Process PropList.txt 45 // Process PropList.txt
48 const props_bytes = @embedFile("props"); 46 const props_bytes = @embedFile("props");
49 var props_fbs = std.io.fixedBufferStream(props_bytes); 47 var props_fbs = std.io.fixedBufferStream(props_bytes);
50 var props_decomp = decompressor(.raw, props_fbs.reader()); 48 var props_reader = props_fbs.reader();
51 var props_reader = props_decomp.reader();
52 49
53 const stage_1_len: u16 = try props_reader.readInt(u16, endian); 50 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
54 props.props_s1 = try allocator.alloc(u16, stage_1_len); 51 props.props_s1 = try allocator.alloc(u16, stage_1_len);
@@ -63,8 +60,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void {
63 // Process DerivedNumericType.txt 60 // Process DerivedNumericType.txt
64 const num_bytes = @embedFile("numeric"); 61 const num_bytes = @embedFile("numeric");
65 var num_fbs = std.io.fixedBufferStream(num_bytes); 62 var num_fbs = std.io.fixedBufferStream(num_bytes);
66 var num_decomp = decompressor(.raw, num_fbs.reader()); 63 var num_reader = num_fbs.reader();
67 var num_reader = num_decomp.reader();
68 64
69 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); 65 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
70 props.num_s1 = try allocator.alloc(u16, num_stage_1_len); 66 props.num_s1 = try allocator.alloc(u16, num_stage_1_len);
diff --git a/src/Scripts.zig b/src/Scripts.zig
index 3bc90bc..719b01f 100644
--- a/src/Scripts.zig
+++ b/src/Scripts.zig
@@ -196,11 +196,9 @@ pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void {
196} 196}
197 197
198inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { 198inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void {
199 const decompressor = compress.flate.inflate.decompressor;
200 const in_bytes = @embedFile("scripts"); 199 const in_bytes = @embedFile("scripts");
201 var in_fbs = std.io.fixedBufferStream(in_bytes); 200 var in_fbs = std.io.fixedBufferStream(in_bytes);
202 var in_decomp = decompressor(.raw, in_fbs.reader()); 201 var reader = in_fbs.reader();
203 var reader = in_decomp.reader();
204 202
205 const endian = builtin.cpu.arch.endian(); 203 const endian = builtin.cpu.arch.endian();
206 204
@@ -250,7 +248,6 @@ test "Allocation failure" {
250 248
251const std = @import("std"); 249const std = @import("std");
252const builtin = @import("builtin"); 250const builtin = @import("builtin");
253const compress = std.compress;
254const mem = std.mem; 251const mem = std.mem;
255const Allocator = mem.Allocator; 252const Allocator = mem.Allocator;
256const testing = std.testing; 253const testing = std.testing;
diff --git a/src/Words.zig b/src/Words.zig
index 617c34d..ce3203f 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -605,11 +605,9 @@ const SneakIterator = struct {
605}; 605};
606 606
607inline fn setupImpl(wb: *Words, allocator: Allocator) !void { 607inline fn setupImpl(wb: *Words, allocator: Allocator) !void {
608 const decompressor = compress.flate.inflate.decompressor;
609 const in_bytes = @embedFile("wbp"); 608 const in_bytes = @embedFile("wbp");
610 var in_fbs = std.io.fixedBufferStream(in_bytes); 609 var in_fbs = std.io.fixedBufferStream(in_bytes);
611 var in_decomp = decompressor(.raw, in_fbs.reader()); 610 var reader = in_fbs.reader();
612 var reader = in_decomp.reader();
613 611
614 const endian = builtin.cpu.arch.endian(); 612 const endian = builtin.cpu.arch.endian();
615 613
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index ae177a9..e2a5a96 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -3,35 +3,30 @@ const dbg_print = false;
3test "Unicode normalization tests" { 3test "Unicode normalization tests" {
4 var arena = heap.ArenaAllocator.init(testing.allocator); 4 var arena = heap.ArenaAllocator.init(testing.allocator);
5 defer arena.deinit(); 5 defer arena.deinit();
6 var allocator = arena.allocator(); 6 const allocator = arena.allocator();
7 7
8 const n = try Normalize.init(allocator); 8 const n = try Normalize.init(allocator);
9 defer n.deinit(allocator); 9 defer n.deinit(allocator);
10 10
11 var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); 11 var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt"));
12 defer file.close();
13 var buf_reader = io.bufferedReader(file.reader());
14 var input_stream = buf_reader.reader();
15
16 var buf: [4096]u8 = undefined;
17 var cp_buf: [4]u8 = undefined; 12 var cp_buf: [4]u8 = undefined;
18 13
19 var line_iter: IterRead = .{ .read = &input_stream }; 14 var line_iter: IterRead = .{ .read = &reader };
20 15
21 while (try line_iter.next(&buf)) |line| { 16 while (line_iter.next()) |line| {
22 // Iterate over fields. 17 // Iterate over fields.
23 var fields = mem.splitScalar(u8, line, ';'); 18 var fields = mem.splitScalar(u8, line, ';');
24 var field_index: usize = 0; 19 var field_index: usize = 0;
25 var input: []u8 = undefined; 20 var input: []u8 = undefined;
26 defer allocator.free(input); 21 if (dbg_print) std.debug.print("Line: {s}\n", .{line});
27
28 while (fields.next()) |field| : (field_index += 1) { 22 while (fields.next()) |field| : (field_index += 1) {
29 if (field_index == 0) { 23 if (field_index == 0) {
30 var i_buf = std.ArrayList(u8).init(allocator); 24 var i_buf = std.array_list.Managed(u8).init(allocator);
31 defer i_buf.deinit(); 25 defer i_buf.deinit();
32 26
33 var i_fields = mem.splitScalar(u8, field, ' '); 27 var i_fields = mem.splitScalar(u8, field, ' ');
34 while (i_fields.next()) |s| { 28 while (i_fields.next()) |s| {
29 if (dbg_print) std.debug.print("Debug: {s}\n", .{s});
35 const icp = try fmt.parseInt(u21, s, 16); 30 const icp = try fmt.parseInt(u21, s, 16);
36 const len = try unicode.utf8Encode(icp, &cp_buf); 31 const len = try unicode.utf8Encode(icp, &cp_buf);
37 try i_buf.appendSlice(cp_buf[0..len]); 32 try i_buf.appendSlice(cp_buf[0..len]);
@@ -41,7 +36,7 @@ test "Unicode normalization tests" {
41 } else if (field_index == 1) { 36 } else if (field_index == 1) {
42 if (dbg_print) debug.print("\n*** {s} ***\n", .{line}); 37 if (dbg_print) debug.print("\n*** {s} ***\n", .{line});
43 // NFC, time to test. 38 // NFC, time to test.
44 var w_buf = std.ArrayList(u8).init(allocator); 39 var w_buf = std.array_list.Managed(u8).init(allocator);
45 defer w_buf.deinit(); 40 defer w_buf.deinit();
46 41
47 var w_fields = mem.splitScalar(u8, field, ' '); 42 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -58,7 +53,7 @@ test "Unicode normalization tests" {
58 try testing.expectEqualStrings(want, got.slice); 53 try testing.expectEqualStrings(want, got.slice);
59 } else if (field_index == 2) { 54 } else if (field_index == 2) {
60 // NFD, time to test. 55 // NFD, time to test.
61 var w_buf = std.ArrayList(u8).init(allocator); 56 var w_buf = std.array_list.Managed(u8).init(allocator);
62 defer w_buf.deinit(); 57 defer w_buf.deinit();
63 58
64 var w_fields = mem.splitScalar(u8, field, ' '); 59 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -75,7 +70,7 @@ test "Unicode normalization tests" {
75 try testing.expectEqualStrings(want, got.slice); 70 try testing.expectEqualStrings(want, got.slice);
76 } else if (field_index == 3) { 71 } else if (field_index == 3) {
77 // NFKC, time to test. 72 // NFKC, time to test.
78 var w_buf = std.ArrayList(u8).init(allocator); 73 var w_buf = std.array_list.Managed(u8).init(allocator);
79 defer w_buf.deinit(); 74 defer w_buf.deinit();
80 75
81 var w_fields = mem.splitScalar(u8, field, ' '); 76 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -92,7 +87,7 @@ test "Unicode normalization tests" {
92 try testing.expectEqualStrings(want, got.slice); 87 try testing.expectEqualStrings(want, got.slice);
93 } else if (field_index == 4) { 88 } else if (field_index == 4) {
94 // NFKD, time to test. 89 // NFKD, time to test.
95 var w_buf = std.ArrayList(u8).init(allocator); 90 var w_buf = std.array_list.Managed(u8).init(allocator);
96 defer w_buf.deinit(); 91 defer w_buf.deinit();
97 92
98 var w_fields = mem.splitScalar(u8, field, ' '); 93 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -111,33 +106,34 @@ test "Unicode normalization tests" {
111 continue; 106 continue;
112 } 107 }
113 } 108 }
109 } else |err| switch (err) {
110 error.EndOfStream => {},
111 else => {
112 return err;
113 },
114 } 114 }
115} 115}
116 116
117test "Segmentation GraphemeIterator" { 117test "Segmentation GraphemeIterator" {
118 const allocator = std.testing.allocator; 118 const allocator = std.testing.allocator;
119 var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
120 defer file.close();
121 var buf_reader = std.io.bufferedReader(file.reader());
122 var input_stream = buf_reader.reader();
123 119
120 var reader = std.io.Reader.fixed(@embedFile("GraphemeBreakTest.txt"));
124 const graph = try Graphemes.init(allocator); 121 const graph = try Graphemes.init(allocator);
125 defer graph.deinit(allocator); 122 defer graph.deinit(allocator);
126 123
127 var buf: [4096]u8 = undefined; 124 var line_iter: IterRead = .{ .read = &reader };
128 var line_iter: IterRead = .{ .read = &input_stream };
129 125
130 while (try line_iter.next(&buf)) |raw| { 126 while (line_iter.next()) |raw| {
131 // Clean up. 127 // Clean up.
132 var line = std.mem.trimLeft(u8, raw, "÷ "); 128 var line = std.mem.trimLeft(u8, raw, "÷ ");
133 if (std.mem.indexOf(u8, line, " ÷\t")) |final| { 129 if (std.mem.indexOf(u8, line, " ÷\t")) |final| {
134 line = line[0..final]; 130 line = line[0..final];
135 } 131 }
136 // Iterate over fields. 132 // Iterate over fields.
137 var want = std.ArrayList(Grapheme).init(allocator); 133 var want = std.array_list.Managed(Grapheme).init(allocator);
138 defer want.deinit(); 134 defer want.deinit();
139 135
140 var all_bytes = std.ArrayList(u8).init(allocator); 136 var all_bytes = std.array_list.Managed(u8).init(allocator);
141 defer all_bytes.deinit(); 137 defer all_bytes.deinit();
142 138
143 var graphemes = std.mem.splitSequence(u8, line, " ÷ "); 139 var graphemes = std.mem.splitSequence(u8, line, " ÷ ");
@@ -250,33 +246,33 @@ test "Segmentation GraphemeIterator" {
250 } 246 }
251 } 247 }
252 } 248 }
249 } else |err| switch (err) {
250 error.EndOfStream => {},
251 else => {
252 return err;
253 },
253 } 254 }
254} 255}
255 256
256test "Segmentation Word Iterator" { 257test "Segmentation Word Iterator" {
257 const allocator = std.testing.allocator; 258 const allocator = std.testing.allocator;
258 var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); 259 var reader = std.io.Reader.fixed(@embedFile("WordBreakTest.txt"));
259 defer file.close();
260 var buf_reader = std.io.bufferedReader(file.reader());
261 var input_stream = buf_reader.reader();
262
263 const wb = try Words.init(allocator); 260 const wb = try Words.init(allocator);
264 defer wb.deinit(allocator); 261 defer wb.deinit(allocator);
265 262
266 var buf: [4096]u8 = undefined; 263 var line_iter: IterRead = .{ .read = &reader };
267 var line_iter: IterRead = .{ .read = &input_stream };
268 264
269 while (try line_iter.next(&buf)) |raw| { 265 while (line_iter.next()) |raw| {
270 // Clean up. 266 // Clean up.
271 var line = std.mem.trimLeft(u8, raw, "÷ "); 267 var line = std.mem.trimLeft(u8, raw, "÷ ");
272 if (std.mem.indexOf(u8, line, " ÷\t")) |final| { 268 if (std.mem.indexOf(u8, line, " ÷\t")) |final| {
273 line = line[0..final]; 269 line = line[0..final];
274 } 270 }
275 // Iterate over fields. 271 // Iterate over fields.
276 var want = std.ArrayList(Word).init(allocator); 272 var want = std.array_list.Managed(Word).init(allocator);
277 defer want.deinit(); 273 defer want.deinit();
278 274
279 var all_bytes = std.ArrayList(u8).init(allocator); 275 var all_bytes = std.array_list.Managed(u8).init(allocator);
280 defer all_bytes.deinit(); 276 defer all_bytes.deinit();
281 277
282 var words = std.mem.splitSequence(u8, line, " ÷ "); 278 var words = std.mem.splitSequence(u8, line, " ÷ ");
@@ -439,26 +435,27 @@ test "Segmentation Word Iterator" {
439 if (idx == 0) break; 435 if (idx == 0) break;
440 } 436 }
441 } 437 }
438 } else |err| switch (err) {
439 error.EndOfStream => {},
440 else => {
441 return err;
442 },
442 } 443 }
443} 444}
444 445
445const IterRead = struct { 446const IterRead = struct {
446 read: *Reader, 447 read: *io.Reader,
447 line: usize = 0, 448 line: usize = 0,
448 449
449 pub fn next(iter: *IterRead, buf: []u8) !?[]const u8 { 450 pub fn next(iter: *IterRead) anyerror![]const u8 {
450 defer iter.line += 1; 451 iter.line += 1;
451 const maybe_line = try iter.read.readUntilDelimiterOrEof(buf, '#'); 452 const took = try iter.read.takeDelimiterInclusive('\n');
452 if (maybe_line) |this_line| { 453 const this_line = std.mem.trimRight(u8, took, "\n");
453 try iter.read.skipUntilDelimiterOrEof('\n'); 454 if (this_line.len == 0 or this_line[0] == '@' or this_line[0] == '#') {
454 if (this_line.len == 0 or this_line[0] == '@') { 455 // comment, next line
455 // comment, next line 456 return iter.next();
456 return iter.next(buf);
457 } else {
458 return this_line;
459 }
460 } else { 457 } else {
461 return null; 458 return this_line;
462 } 459 }
463 } 460 }
464}; 461};
@@ -467,7 +464,6 @@ const std = @import("std");
467const fmt = std.fmt; 464const fmt = std.fmt;
468const fs = std.fs; 465const fs = std.fs;
469const io = std.io; 466const io = std.io;
470const Reader = io.BufferedReader(4096, fs.File.Reader).Reader;
471const heap = std.heap; 467const heap = std.heap;
472const mem = std.mem; 468const mem = std.mem;
473const debug = std.debug; 469const debug = std.debug;