summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Lich2026-01-13 01:10:17 +0300
committerGravatar Lich2026-01-13 01:10:17 +0300
commitdfece51720a17fddd0520ce8bda1a3c05d110949 (patch)
treed841e9321c1135ba5644b444ba7bb508ec4df025 /src
parentMoved part of the `strWidth` into its own `graphemeClusterWidth` function (diff)
parentMerge pull request 'Use width 2 when skin tone modifier detected' (#96) from ... (diff)
downloadzg-dfece51720a17fddd0520ce8bda1a3c05d110949.tar.gz
zg-dfece51720a17fddd0520ce8bda1a3c05d110949.tar.xz
zg-dfece51720a17fddd0520ce8bda1a3c05d110949.zip
Merge branch 'master' of https://codeberg.org/atman/zg into graphemeClusterWidth
Diffstat (limited to 'src')
-rw-r--r--src/CanonData.zig4
-rw-r--r--src/CaseFolding.zig6
-rw-r--r--src/CombiningData.zig5
-rw-r--r--src/CompatData.zig5
-rw-r--r--src/DisplayWidth.zig31
-rw-r--r--src/GeneralCategories.zig4
-rw-r--r--src/Graphemes.zig4
-rw-r--r--src/HangulData.zig5
-rw-r--r--src/LetterCasing.zig14
-rw-r--r--src/NormPropsData.zig5
-rw-r--r--src/Normalize.zig10
-rw-r--r--src/Properties.zig10
-rw-r--r--src/Scripts.zig5
-rw-r--r--src/Words.zig4
-rw-r--r--src/unicode_tests.zig94
15 files changed, 92 insertions, 114 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig
index 5d2332a..cf9dc8a 100644
--- a/src/CanonData.zig
+++ b/src/CanonData.zig
@@ -7,11 +7,9 @@ cps: []u21 = undefined,
7const CanonData = @This(); 7const CanonData = @This();
8 8
9pub fn init(allocator: mem.Allocator) !CanonData { 9pub fn init(allocator: mem.Allocator) !CanonData {
10 const decompressor = compress.flate.inflate.decompressor;
11 const in_bytes = @embedFile("canon"); 10 const in_bytes = @embedFile("canon");
12 var in_fbs = std.io.fixedBufferStream(in_bytes); 11 var in_fbs = std.io.fixedBufferStream(in_bytes);
13 var in_decomp = decompressor(.raw, in_fbs.reader()); 12 var reader = in_fbs.reader();
14 var reader = in_decomp.reader();
15 13
16 const endian = builtin.cpu.arch.endian(); 14 const endian = builtin.cpu.arch.endian();
17 var cdata = CanonData{ 15 var cdata = CanonData{
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig
index ff41b3e..df86b92 100644
--- a/src/CaseFolding.zig
+++ b/src/CaseFolding.zig
@@ -48,11 +48,9 @@ fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void
48} 48}
49 49
50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { 50inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void {
51 const decompressor = compress.flate.inflate.decompressor;
52 const in_bytes = @embedFile("fold"); 51 const in_bytes = @embedFile("fold");
53 var in_fbs = std.io.fixedBufferStream(in_bytes); 52 var in_fbs = std.io.fixedBufferStream(in_bytes);
54 var in_decomp = decompressor(.raw, in_fbs.reader()); 53 var reader = in_fbs.reader();
55 var reader = in_decomp.reader();
56 54
57 const endian = builtin.cpu.arch.endian(); 55 const endian = builtin.cpu.arch.endian();
58 56
@@ -123,7 +121,7 @@ pub fn caseFoldAlloc(
123 allocator: Allocator, 121 allocator: Allocator,
124 cps: []const u21, 122 cps: []const u21,
125) Allocator.Error![]const u21 { 123) Allocator.Error![]const u21 {
126 var cfcps = std.ArrayList(u21).init(allocator); 124 var cfcps = std.array_list.Managed(u21).init(allocator);
127 defer cfcps.deinit(); 125 defer cfcps.deinit();
128 var buf: [3]u21 = undefined; 126 var buf: [3]u21 = undefined;
129 127
diff --git a/src/CombiningData.zig b/src/CombiningData.zig
index fd64a3b..f58e0de 100644
--- a/src/CombiningData.zig
+++ b/src/CombiningData.zig
@@ -6,11 +6,9 @@ s2: []u8 = undefined,
6const CombiningData = @This(); 6const CombiningData = @This();
7 7
8pub fn init(allocator: mem.Allocator) !CombiningData { 8pub fn init(allocator: mem.Allocator) !CombiningData {
9 const decompressor = compress.flate.inflate.decompressor;
10 const in_bytes = @embedFile("ccc"); 9 const in_bytes = @embedFile("ccc");
11 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 var in_fbs = std.io.fixedBufferStream(in_bytes);
12 var in_decomp = decompressor(.raw, in_fbs.reader()); 11 var reader = in_fbs.reader();
13 var reader = in_decomp.reader();
14 12
15 const endian = builtin.cpu.arch.endian(); 13 const endian = builtin.cpu.arch.endian();
16 14
@@ -46,5 +44,4 @@ pub fn isStarter(cbdata: CombiningData, cp: u21) bool {
46 44
47const std = @import("std"); 45const std = @import("std");
48const builtin = @import("builtin"); 46const builtin = @import("builtin");
49const compress = std.compress;
50const mem = std.mem; 47const mem = std.mem;
diff --git a/src/CompatData.zig b/src/CompatData.zig
index 794abca..40ecd12 100644
--- a/src/CompatData.zig
+++ b/src/CompatData.zig
@@ -6,11 +6,9 @@ cps: []u21 = undefined,
6const CompatData = @This(); 6const CompatData = @This();
7 7
8pub fn init(allocator: mem.Allocator) !CompatData { 8pub fn init(allocator: mem.Allocator) !CompatData {
9 const decompressor = compress.flate.inflate.decompressor;
10 const in_bytes = @embedFile("compat"); 9 const in_bytes = @embedFile("compat");
11 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 var in_fbs = std.io.fixedBufferStream(in_bytes);
12 var in_decomp = decompressor(.raw, in_fbs.reader()); 11 var reader = in_fbs.reader();
13 var reader = in_decomp.reader();
14 12
15 const endian = builtin.cpu.arch.endian(); 13 const endian = builtin.cpu.arch.endian();
16 var cpdata = CompatData{ 14 var cpdata = CompatData{
@@ -55,6 +53,5 @@ pub fn toNfkd(cpdata: *const CompatData, cp: u21) []u21 {
55 53
56const std = @import("std"); 54const std = @import("std");
57const builtin = @import("builtin"); 55const builtin = @import("builtin");
58const compress = std.compress;
59const mem = std.mem; 56const mem = std.mem;
60const magic = @import("magic"); 57const magic = @import("magic");
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig
index 629087b..dee7ebd 100644
--- a/src/DisplayWidth.zig
+++ b/src/DisplayWidth.zig
@@ -39,11 +39,9 @@ pub fn setupWithGraphemes(dw: *DisplayWidth, allocator: Allocator, graphemes: Gr
39 39
40// Sets up the DisplayWidthData, leaving the GraphemeData undefined. 40// Sets up the DisplayWidthData, leaving the GraphemeData undefined.
41pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { 41pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void {
42 const decompressor = compress.flate.inflate.decompressor;
43 const in_bytes = @embedFile("dwp"); 42 const in_bytes = @embedFile("dwp");
44 var in_fbs = std.io.fixedBufferStream(in_bytes); 43 var in_fbs = std.io.fixedBufferStream(in_bytes);
45 var in_decomp = decompressor(.raw, in_fbs.reader()); 44 var reader = in_fbs.reader();
46 var reader = in_decomp.reader();
47 45
48 const endian = builtin.cpu.arch.endian(); 46 const endian = builtin.cpu.arch.endian();
49 47
@@ -118,6 +116,8 @@ pub fn graphemeClusterWidth(dw: DisplayWidth, gc: []const u8) isize {
118 // emoji text sequence. 116 // emoji text sequence.
119 if (ncp.code == 0xFE0E) w = 1; 117 if (ncp.code == 0xFE0E) w = 1;
120 if (ncp.code == 0xFE0F) w = 2; 118 if (ncp.code == 0xFE0F) w = 2;
119 // Skin tones
120 if (0x1F3FB <= ncp.code and ncp.code <= 0x1F3FF) w = 2;
121 } 121 }
122 122
123 // Only adding width of first non-zero-width code point. 123 // Only adding width of first non-zero-width code point.
@@ -207,6 +207,9 @@ test "strWidth" {
207 try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); 207 try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ"));
208 try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); 208 try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나"));
209 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); 209 try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}"));
210
211 // https://codeberg.org/atman/zg/issues/82
212 try testing.expectEqual(@as(usize, 12), dw.strWidth("✍️✍🏻✍🏼✍🏽✍🏾✍🏿"));
210} 213}
211 214
212/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. 215/// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding.
@@ -404,7 +407,7 @@ pub fn wrap(
404 columns: usize, 407 columns: usize,
405 threshold: usize, 408 threshold: usize,
406) ![]u8 { 409) ![]u8 {
407 var result = ArrayList(u8).init(allocator); 410 var result = std.array_list.Managed(u8).init(allocator);
408 defer result.deinit(); 411 defer result.deinit();
409 412
410 var line_iter = mem.tokenizeAny(u8, str, "\r\n"); 413 var line_iter = mem.tokenizeAny(u8, str, "\r\n");
@@ -426,8 +429,10 @@ pub fn wrap(
426 } 429 }
427 430
428 // Remove trailing space and newline. 431 // Remove trailing space and newline.
429 _ = result.pop(); 432 if (result.items[result.items.len - 1] == '\n')
430 _ = result.pop(); 433 _ = result.pop();
434 if (result.items[result.items.len - 1] == ' ')
435 _ = result.pop();
431 436
432 return try result.toOwnedSlice(); 437 return try result.toOwnedSlice();
433} 438}
@@ -444,6 +449,18 @@ test "wrap" {
444 try testing.expectEqualStrings(want, got); 449 try testing.expectEqualStrings(want, got);
445} 450}
446 451
452test "zg/74" {
453 var debug_alloc = std.heap.DebugAllocator(.{}).init;
454 const allocator = debug_alloc.allocator();
455 defer _ = debug_alloc.deinit();
456 const dw = try DisplayWidth.init(allocator);
457 defer dw.deinit(allocator);
458 const wrapped = try dw.wrap(allocator, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam pellentesque pulvinar felis, sit amet commodo ligula feugiat sed. Sed quis malesuada elit, nec eleifend lectus. Sed tincidunt finibus aliquet. Praesent consectetur nibh libero, tempus imperdiet lorem congue eget.", 16, 1);
459 defer allocator.free(wrapped);
460 const expected_wrap = "Lorem ipsum dolor \nsit amet, consectetur \nadipiscing elit. \nNullam pellentesque \npulvinar felis, \nsit amet commodo \nligula feugiat \nsed. Sed quis malesuada \nelit, nec eleifend \nlectus. Sed tincidunt \nfinibus aliquet. \nPraesent consectetur \nnibh libero, tempus \nimperdiet lorem \ncongue eget.";
461 try std.testing.expectEqualStrings(expected_wrap, wrapped);
462}
463
447fn testAllocation(allocator: Allocator) !void { 464fn testAllocation(allocator: Allocator) !void {
448 { 465 {
449 var dw = try DisplayWidth.init(allocator); 466 var dw = try DisplayWidth.init(allocator);
@@ -464,8 +481,6 @@ test "allocation test" {
464const std = @import("std"); 481const std = @import("std");
465const builtin = @import("builtin"); 482const builtin = @import("builtin");
466const options = @import("options"); 483const options = @import("options");
467const ArrayList = std.ArrayList;
468const compress = std.compress;
469const mem = std.mem; 484const mem = std.mem;
470const Allocator = mem.Allocator; 485const Allocator = mem.Allocator;
471const simd = std.simd; 486const simd = std.simd;
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig
index 8c1b6a3..eee7e56 100644
--- a/src/GeneralCategories.zig
+++ b/src/GeneralCategories.zig
@@ -47,11 +47,9 @@ pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories {
47} 47}
48 48
49pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { 49pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void {
50 const decompressor = compress.flate.inflate.decompressor;
51 const in_bytes = @embedFile("gencat"); 50 const in_bytes = @embedFile("gencat");
52 var in_fbs = std.io.fixedBufferStream(in_bytes); 51 var in_fbs = std.io.fixedBufferStream(in_bytes);
53 var in_decomp = decompressor(.raw, in_fbs.reader()); 52 var reader = in_fbs.reader();
54 var reader = in_decomp.reader();
55 53
56 const endian = builtin.cpu.arch.endian(); 54 const endian = builtin.cpu.arch.endian();
57 55
diff --git a/src/Graphemes.zig b/src/Graphemes.zig
index f1c56ed..81d874c 100644
--- a/src/Graphemes.zig
+++ b/src/Graphemes.zig
@@ -16,11 +16,9 @@ pub fn init(allocator: Allocator) Allocator.Error!Graphemes {
16} 16}
17 17
18pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { 18pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void {
19 const decompressor = compress.flate.inflate.decompressor;
20 const in_bytes = @embedFile("gbp"); 19 const in_bytes = @embedFile("gbp");
21 var in_fbs = std.io.fixedBufferStream(in_bytes); 20 var in_fbs = std.io.fixedBufferStream(in_bytes);
22 var in_decomp = decompressor(.raw, in_fbs.reader()); 21 var reader = in_fbs.reader();
23 var reader = in_decomp.reader();
24 22
25 const endian = builtin.cpu.arch.endian(); 23 const endian = builtin.cpu.arch.endian();
26 24
diff --git a/src/HangulData.zig b/src/HangulData.zig
index 8c5f3ad..cae8b97 100644
--- a/src/HangulData.zig
+++ b/src/HangulData.zig
@@ -15,11 +15,9 @@ s2: []u3 = undefined,
15const Hangul = @This(); 15const Hangul = @This();
16 16
17pub fn init(allocator: mem.Allocator) !Hangul { 17pub fn init(allocator: mem.Allocator) !Hangul {
18 const decompressor = compress.flate.inflate.decompressor;
19 const in_bytes = @embedFile("hangul"); 18 const in_bytes = @embedFile("hangul");
20 var in_fbs = std.io.fixedBufferStream(in_bytes); 19 var in_fbs = std.io.fixedBufferStream(in_bytes);
21 var in_decomp = decompressor(.raw, in_fbs.reader()); 20 var reader = in_fbs.reader();
22 var reader = in_decomp.reader();
23 21
24 const endian = builtin.cpu.arch.endian(); 22 const endian = builtin.cpu.arch.endian();
25 var hangul = Hangul{}; 23 var hangul = Hangul{};
@@ -49,6 +47,5 @@ pub fn syllable(hangul: *const Hangul, cp: u21) Syllable {
49 47
50const std = @import("std"); 48const std = @import("std");
51const builtin = @import("builtin"); 49const builtin = @import("builtin");
52const compress = std.compress;
53const mem = std.mem; 50const mem = std.mem;
54const testing = std.testing; 51const testing = std.testing;
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig
index 11a3e96..33096fc 100644
--- a/src/LetterCasing.zig
+++ b/src/LetterCasing.zig
@@ -22,7 +22,6 @@ pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void {
22} 22}
23 23
24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { 24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
25 const decompressor = compress.flate.inflate.decompressor;
26 const endian = builtin.cpu.arch.endian(); 25 const endian = builtin.cpu.arch.endian();
27 26
28 self.case_map = try allocator.alloc([2]u21, 0x110000); 27 self.case_map = try allocator.alloc([2]u21, 0x110000);
@@ -36,8 +35,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
36 // Uppercase 35 // Uppercase
37 const upper_bytes = @embedFile("upper"); 36 const upper_bytes = @embedFile("upper");
38 var upper_fbs = std.io.fixedBufferStream(upper_bytes); 37 var upper_fbs = std.io.fixedBufferStream(upper_bytes);
39 var upper_decomp = decompressor(.raw, upper_fbs.reader()); 38 var upper_reader = upper_fbs.reader();
40 var upper_reader = upper_decomp.reader();
41 39
42 while (true) { 40 while (true) {
43 const cp = try upper_reader.readInt(i24, endian); 41 const cp = try upper_reader.readInt(i24, endian);
@@ -49,8 +47,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
49 // Lowercase 47 // Lowercase
50 const lower_bytes = @embedFile("lower"); 48 const lower_bytes = @embedFile("lower");
51 var lower_fbs = std.io.fixedBufferStream(lower_bytes); 49 var lower_fbs = std.io.fixedBufferStream(lower_bytes);
52 var lower_decomp = decompressor(.raw, lower_fbs.reader()); 50 var lower_reader = lower_fbs.reader();
53 var lower_reader = lower_decomp.reader();
54 51
55 while (true) { 52 while (true) {
56 const cp = try lower_reader.readInt(i24, endian); 53 const cp = try lower_reader.readInt(i24, endian);
@@ -62,8 +59,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
62 // Case properties 59 // Case properties
63 const cp_bytes = @embedFile("case_prop"); 60 const cp_bytes = @embedFile("case_prop");
64 var cp_fbs = std.io.fixedBufferStream(cp_bytes); 61 var cp_fbs = std.io.fixedBufferStream(cp_bytes);
65 var cp_decomp = decompressor(.raw, cp_fbs.reader()); 62 var cp_reader = cp_fbs.reader();
66 var cp_reader = cp_decomp.reader();
67 63
68 const stage_1_len: u16 = try cp_reader.readInt(u16, endian); 64 const stage_1_len: u16 = try cp_reader.readInt(u16, endian);
69 self.prop_s1 = try allocator.alloc(u16, stage_1_len); 65 self.prop_s1 = try allocator.alloc(u16, stage_1_len);
@@ -122,7 +118,7 @@ pub fn toUpperStr(
122 allocator: mem.Allocator, 118 allocator: mem.Allocator,
123 str: []const u8, 119 str: []const u8,
124) ![]u8 { 120) ![]u8 {
125 var bytes = std.ArrayList(u8).init(allocator); 121 var bytes = std.array_list.Managed(u8).init(allocator);
126 defer bytes.deinit(); 122 defer bytes.deinit();
127 123
128 var iter = CodePointIterator{ .bytes = str }; 124 var iter = CodePointIterator{ .bytes = str };
@@ -180,7 +176,7 @@ pub fn toLowerStr(
180 allocator: mem.Allocator, 176 allocator: mem.Allocator,
181 str: []const u8, 177 str: []const u8,
182) ![]u8 { 178) ![]u8 {
183 var bytes = std.ArrayList(u8).init(allocator); 179 var bytes = std.array_list.Managed(u8).init(allocator);
184 defer bytes.deinit(); 180 defer bytes.deinit();
185 181
186 var iter = CodePointIterator{ .bytes = str }; 182 var iter = CodePointIterator{ .bytes = str };
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig
index ca69569..7b53542 100644
--- a/src/NormPropsData.zig
+++ b/src/NormPropsData.zig
@@ -6,11 +6,9 @@ s2: []u4 = undefined,
6const NormProps = @This(); 6const NormProps = @This();
7 7
8pub fn init(allocator: mem.Allocator) !NormProps { 8pub fn init(allocator: mem.Allocator) !NormProps {
9 const decompressor = compress.flate.inflate.decompressor;
10 const in_bytes = @embedFile("normp"); 9 const in_bytes = @embedFile("normp");
11 var in_fbs = std.io.fixedBufferStream(in_bytes); 10 var in_fbs = std.io.fixedBufferStream(in_bytes);
12 var in_decomp = decompressor(.raw, in_fbs.reader()); 11 var reader = in_fbs.reader();
13 var reader = in_decomp.reader();
14 12
15 const endian = builtin.cpu.arch.endian(); 13 const endian = builtin.cpu.arch.endian();
16 var norms = NormProps{}; 14 var norms = NormProps{};
@@ -50,6 +48,5 @@ pub fn isFcx(norms: *const NormProps, cp: u21) bool {
50 48
51const std = @import("std"); 49const std = @import("std");
52const builtin = @import("builtin"); 50const builtin = @import("builtin");
53const compress = std.compress;
54const mem = std.mem; 51const mem = std.mem;
55const testing = std.testing; 52const testing = std.testing;
diff --git a/src/Normalize.zig b/src/Normalize.zig
index 989ec29..4a1bae8 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -305,7 +305,7 @@ pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Er
305} 305}
306 306
307pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { 307pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 {
308 var dcp_list = std.ArrayList(u21).init(allocator); 308 var dcp_list = std.array_list.Managed(u21).init(allocator);
309 defer dcp_list.deinit(); 309 defer dcp_list.deinit();
310 310
311 var cp_iter = CodePointIterator{ .bytes = str }; 311 var cp_iter = CodePointIterator{ .bytes = str };
@@ -332,7 +332,7 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
332 const dcps = try self.nfxdCodePoints(allocator, str, form); 332 const dcps = try self.nfxdCodePoints(allocator, str, form);
333 defer allocator.free(dcps); 333 defer allocator.free(dcps);
334 334
335 var dstr_list = std.ArrayList(u8).init(allocator); 335 var dstr_list = std.array_list.Managed(u8).init(allocator);
336 defer dstr_list.deinit(); 336 defer dstr_list.deinit();
337 var buf: [4]u8 = undefined; 337 var buf: [4]u8 = undefined;
338 338
@@ -393,7 +393,7 @@ pub fn nfdCodePoints(
393 allocator: Allocator, 393 allocator: Allocator,
394 cps: []const u21, 394 cps: []const u21,
395) Allocator.Error![]u21 { 395) Allocator.Error![]u21 {
396 var dcp_list = std.ArrayList(u21).init(allocator); 396 var dcp_list = std.array_list.Managed(u21).init(allocator);
397 defer dcp_list.deinit(); 397 defer dcp_list.deinit();
398 398
399 var dc_buf: [18]u21 = undefined; 399 var dc_buf: [18]u21 = undefined;
@@ -418,7 +418,7 @@ pub fn nfkdCodePoints(
418 allocator: Allocator, 418 allocator: Allocator,
419 cps: []const u21, 419 cps: []const u21,
420) Allocator.Error![]u21 { 420) Allocator.Error![]u21 {
421 var dcp_list = std.ArrayList(u21).init(allocator); 421 var dcp_list = std.array_list.Managed(u21).init(allocator);
422 defer dcp_list.deinit(); 422 defer dcp_list.deinit();
423 423
424 var dc_buf: [18]u21 = undefined; 424 var dc_buf: [18]u21 = undefined;
@@ -560,7 +560,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo
560 // If we have no deletions. the code point sequence 560 // If we have no deletions. the code point sequence
561 // has been fully composed. 561 // has been fully composed.
562 if (deleted == 0) { 562 if (deleted == 0) {
563 var cstr_list = std.ArrayList(u8).init(allocator); 563 var cstr_list = std.array_list.Managed(u8).init(allocator);
564 defer cstr_list.deinit(); 564 defer cstr_list.deinit();
565 var buf: [4]u8 = undefined; 565 var buf: [4]u8 = undefined;
566 566
diff --git a/src/Properties.zig b/src/Properties.zig
index 73602a0..432d176 100644
--- a/src/Properties.zig
+++ b/src/Properties.zig
@@ -25,14 +25,12 @@ pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void {
25} 25}
26 26
27inline fn setupInner(props: *Properties, allocator: Allocator) !void { 27inline fn setupInner(props: *Properties, allocator: Allocator) !void {
28 const decompressor = compress.flate.inflate.decompressor;
29 const endian = builtin.cpu.arch.endian(); 28 const endian = builtin.cpu.arch.endian();
30 29
31 // Process DerivedCoreProperties.txt 30 // Process DerivedCoreProperties.txt
32 const core_bytes = @embedFile("core_props"); 31 const core_bytes = @embedFile("core_props");
33 var core_fbs = std.io.fixedBufferStream(core_bytes); 32 var core_fbs = std.io.fixedBufferStream(core_bytes);
34 var core_decomp = decompressor(.raw, core_fbs.reader()); 33 var core_reader = core_fbs.reader();
35 var core_reader = core_decomp.reader();
36 34
37 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); 35 const core_stage_1_len: u16 = try core_reader.readInt(u16, endian);
38 props.core_s1 = try allocator.alloc(u16, core_stage_1_len); 36 props.core_s1 = try allocator.alloc(u16, core_stage_1_len);
@@ -47,8 +45,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void {
47 // Process PropList.txt 45 // Process PropList.txt
48 const props_bytes = @embedFile("props"); 46 const props_bytes = @embedFile("props");
49 var props_fbs = std.io.fixedBufferStream(props_bytes); 47 var props_fbs = std.io.fixedBufferStream(props_bytes);
50 var props_decomp = decompressor(.raw, props_fbs.reader()); 48 var props_reader = props_fbs.reader();
51 var props_reader = props_decomp.reader();
52 49
53 const stage_1_len: u16 = try props_reader.readInt(u16, endian); 50 const stage_1_len: u16 = try props_reader.readInt(u16, endian);
54 props.props_s1 = try allocator.alloc(u16, stage_1_len); 51 props.props_s1 = try allocator.alloc(u16, stage_1_len);
@@ -63,8 +60,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void {
63 // Process DerivedNumericType.txt 60 // Process DerivedNumericType.txt
64 const num_bytes = @embedFile("numeric"); 61 const num_bytes = @embedFile("numeric");
65 var num_fbs = std.io.fixedBufferStream(num_bytes); 62 var num_fbs = std.io.fixedBufferStream(num_bytes);
66 var num_decomp = decompressor(.raw, num_fbs.reader()); 63 var num_reader = num_fbs.reader();
67 var num_reader = num_decomp.reader();
68 64
69 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); 65 const num_stage_1_len: u16 = try num_reader.readInt(u16, endian);
70 props.num_s1 = try allocator.alloc(u16, num_stage_1_len); 66 props.num_s1 = try allocator.alloc(u16, num_stage_1_len);
diff --git a/src/Scripts.zig b/src/Scripts.zig
index 3bc90bc..719b01f 100644
--- a/src/Scripts.zig
+++ b/src/Scripts.zig
@@ -196,11 +196,9 @@ pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void {
196} 196}
197 197
198inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { 198inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void {
199 const decompressor = compress.flate.inflate.decompressor;
200 const in_bytes = @embedFile("scripts"); 199 const in_bytes = @embedFile("scripts");
201 var in_fbs = std.io.fixedBufferStream(in_bytes); 200 var in_fbs = std.io.fixedBufferStream(in_bytes);
202 var in_decomp = decompressor(.raw, in_fbs.reader()); 201 var reader = in_fbs.reader();
203 var reader = in_decomp.reader();
204 202
205 const endian = builtin.cpu.arch.endian(); 203 const endian = builtin.cpu.arch.endian();
206 204
@@ -250,7 +248,6 @@ test "Allocation failure" {
250 248
251const std = @import("std"); 249const std = @import("std");
252const builtin = @import("builtin"); 250const builtin = @import("builtin");
253const compress = std.compress;
254const mem = std.mem; 251const mem = std.mem;
255const Allocator = mem.Allocator; 252const Allocator = mem.Allocator;
256const testing = std.testing; 253const testing = std.testing;
diff --git a/src/Words.zig b/src/Words.zig
index 617c34d..ce3203f 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -605,11 +605,9 @@ const SneakIterator = struct {
605}; 605};
606 606
607inline fn setupImpl(wb: *Words, allocator: Allocator) !void { 607inline fn setupImpl(wb: *Words, allocator: Allocator) !void {
608 const decompressor = compress.flate.inflate.decompressor;
609 const in_bytes = @embedFile("wbp"); 608 const in_bytes = @embedFile("wbp");
610 var in_fbs = std.io.fixedBufferStream(in_bytes); 609 var in_fbs = std.io.fixedBufferStream(in_bytes);
611 var in_decomp = decompressor(.raw, in_fbs.reader()); 610 var reader = in_fbs.reader();
612 var reader = in_decomp.reader();
613 611
614 const endian = builtin.cpu.arch.endian(); 612 const endian = builtin.cpu.arch.endian();
615 613
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index ae177a9..e2a5a96 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -3,35 +3,30 @@ const dbg_print = false;
3test "Unicode normalization tests" { 3test "Unicode normalization tests" {
4 var arena = heap.ArenaAllocator.init(testing.allocator); 4 var arena = heap.ArenaAllocator.init(testing.allocator);
5 defer arena.deinit(); 5 defer arena.deinit();
6 var allocator = arena.allocator(); 6 const allocator = arena.allocator();
7 7
8 const n = try Normalize.init(allocator); 8 const n = try Normalize.init(allocator);
9 defer n.deinit(allocator); 9 defer n.deinit(allocator);
10 10
11 var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); 11 var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt"));
12 defer file.close();
13 var buf_reader = io.bufferedReader(file.reader());
14 var input_stream = buf_reader.reader();
15
16 var buf: [4096]u8 = undefined;
17 var cp_buf: [4]u8 = undefined; 12 var cp_buf: [4]u8 = undefined;
18 13
19 var line_iter: IterRead = .{ .read = &input_stream }; 14 var line_iter: IterRead = .{ .read = &reader };
20 15
21 while (try line_iter.next(&buf)) |line| { 16 while (line_iter.next()) |line| {
22 // Iterate over fields. 17 // Iterate over fields.
23 var fields = mem.splitScalar(u8, line, ';'); 18 var fields = mem.splitScalar(u8, line, ';');
24 var field_index: usize = 0; 19 var field_index: usize = 0;
25 var input: []u8 = undefined; 20 var input: []u8 = undefined;
26 defer allocator.free(input); 21 if (dbg_print) std.debug.print("Line: {s}\n", .{line});
27
28 while (fields.next()) |field| : (field_index += 1) { 22 while (fields.next()) |field| : (field_index += 1) {
29 if (field_index == 0) { 23 if (field_index == 0) {
30 var i_buf = std.ArrayList(u8).init(allocator); 24 var i_buf = std.array_list.Managed(u8).init(allocator);
31 defer i_buf.deinit(); 25 defer i_buf.deinit();
32 26
33 var i_fields = mem.splitScalar(u8, field, ' '); 27 var i_fields = mem.splitScalar(u8, field, ' ');
34 while (i_fields.next()) |s| { 28 while (i_fields.next()) |s| {
29 if (dbg_print) std.debug.print("Debug: {s}\n", .{s});
35 const icp = try fmt.parseInt(u21, s, 16); 30 const icp = try fmt.parseInt(u21, s, 16);
36 const len = try unicode.utf8Encode(icp, &cp_buf); 31 const len = try unicode.utf8Encode(icp, &cp_buf);
37 try i_buf.appendSlice(cp_buf[0..len]); 32 try i_buf.appendSlice(cp_buf[0..len]);
@@ -41,7 +36,7 @@ test "Unicode normalization tests" {
41 } else if (field_index == 1) { 36 } else if (field_index == 1) {
42 if (dbg_print) debug.print("\n*** {s} ***\n", .{line}); 37 if (dbg_print) debug.print("\n*** {s} ***\n", .{line});
43 // NFC, time to test. 38 // NFC, time to test.
44 var w_buf = std.ArrayList(u8).init(allocator); 39 var w_buf = std.array_list.Managed(u8).init(allocator);
45 defer w_buf.deinit(); 40 defer w_buf.deinit();
46 41
47 var w_fields = mem.splitScalar(u8, field, ' '); 42 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -58,7 +53,7 @@ test "Unicode normalization tests" {
58 try testing.expectEqualStrings(want, got.slice); 53 try testing.expectEqualStrings(want, got.slice);
59 } else if (field_index == 2) { 54 } else if (field_index == 2) {
60 // NFD, time to test. 55 // NFD, time to test.
61 var w_buf = std.ArrayList(u8).init(allocator); 56 var w_buf = std.array_list.Managed(u8).init(allocator);
62 defer w_buf.deinit(); 57 defer w_buf.deinit();
63 58
64 var w_fields = mem.splitScalar(u8, field, ' '); 59 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -75,7 +70,7 @@ test "Unicode normalization tests" {
75 try testing.expectEqualStrings(want, got.slice); 70 try testing.expectEqualStrings(want, got.slice);
76 } else if (field_index == 3) { 71 } else if (field_index == 3) {
77 // NFKC, time to test. 72 // NFKC, time to test.
78 var w_buf = std.ArrayList(u8).init(allocator); 73 var w_buf = std.array_list.Managed(u8).init(allocator);
79 defer w_buf.deinit(); 74 defer w_buf.deinit();
80 75
81 var w_fields = mem.splitScalar(u8, field, ' '); 76 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -92,7 +87,7 @@ test "Unicode normalization tests" {
92 try testing.expectEqualStrings(want, got.slice); 87 try testing.expectEqualStrings(want, got.slice);
93 } else if (field_index == 4) { 88 } else if (field_index == 4) {
94 // NFKD, time to test. 89 // NFKD, time to test.
95 var w_buf = std.ArrayList(u8).init(allocator); 90 var w_buf = std.array_list.Managed(u8).init(allocator);
96 defer w_buf.deinit(); 91 defer w_buf.deinit();
97 92
98 var w_fields = mem.splitScalar(u8, field, ' '); 93 var w_fields = mem.splitScalar(u8, field, ' ');
@@ -111,33 +106,34 @@ test "Unicode normalization tests" {
111 continue; 106 continue;
112 } 107 }
113 } 108 }
109 } else |err| switch (err) {
110 error.EndOfStream => {},
111 else => {
112 return err;
113 },
114 } 114 }
115} 115}
116 116
117test "Segmentation GraphemeIterator" { 117test "Segmentation GraphemeIterator" {
118 const allocator = std.testing.allocator; 118 const allocator = std.testing.allocator;
119 var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{});
120 defer file.close();
121 var buf_reader = std.io.bufferedReader(file.reader());
122 var input_stream = buf_reader.reader();
123 119
120 var reader = std.io.Reader.fixed(@embedFile("GraphemeBreakTest.txt"));
124 const graph = try Graphemes.init(allocator); 121 const graph = try Graphemes.init(allocator);
125 defer graph.deinit(allocator); 122 defer graph.deinit(allocator);
126 123
127 var buf: [4096]u8 = undefined; 124 var line_iter: IterRead = .{ .read = &reader };
128 var line_iter: IterRead = .{ .read = &input_stream };
129 125
130 while (try line_iter.next(&buf)) |raw| { 126 while (line_iter.next()) |raw| {
131 // Clean up. 127 // Clean up.
132 var line = std.mem.trimLeft(u8, raw, "÷ "); 128 var line = std.mem.trimLeft(u8, raw, "÷ ");
133 if (std.mem.indexOf(u8, line, " ÷\t")) |final| { 129 if (std.mem.indexOf(u8, line, " ÷\t")) |final| {
134 line = line[0..final]; 130 line = line[0..final];
135 } 131 }
136 // Iterate over fields. 132 // Iterate over fields.
137 var want = std.ArrayList(Grapheme).init(allocator); 133 var want = std.array_list.Managed(Grapheme).init(allocator);
138 defer want.deinit(); 134 defer want.deinit();
139 135
140 var all_bytes = std.ArrayList(u8).init(allocator); 136 var all_bytes = std.array_list.Managed(u8).init(allocator);
141 defer all_bytes.deinit(); 137 defer all_bytes.deinit();
142 138
143 var graphemes = std.mem.splitSequence(u8, line, " ÷ "); 139 var graphemes = std.mem.splitSequence(u8, line, " ÷ ");
@@ -250,33 +246,33 @@ test "Segmentation GraphemeIterator" {
250 } 246 }
251 } 247 }
252 } 248 }
249 } else |err| switch (err) {
250 error.EndOfStream => {},
251 else => {
252 return err;
253 },
253 } 254 }
254} 255}
255 256
256test "Segmentation Word Iterator" { 257test "Segmentation Word Iterator" {
257 const allocator = std.testing.allocator; 258 const allocator = std.testing.allocator;
258 var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); 259 var reader = std.io.Reader.fixed(@embedFile("WordBreakTest.txt"));
259 defer file.close();
260 var buf_reader = std.io.bufferedReader(file.reader());
261 var input_stream = buf_reader.reader();
262
263 const wb = try Words.init(allocator); 260 const wb = try Words.init(allocator);
264 defer wb.deinit(allocator); 261 defer wb.deinit(allocator);
265 262
266 var buf: [4096]u8 = undefined; 263 var line_iter: IterRead = .{ .read = &reader };
267 var line_iter: IterRead = .{ .read = &input_stream };
268 264
269 while (try line_iter.next(&buf)) |raw| { 265 while (line_iter.next()) |raw| {
270 // Clean up. 266 // Clean up.
271 var line = std.mem.trimLeft(u8, raw, "÷ "); 267 var line = std.mem.trimLeft(u8, raw, "÷ ");
272 if (std.mem.indexOf(u8, line, " ÷\t")) |final| { 268 if (std.mem.indexOf(u8, line, " ÷\t")) |final| {
273 line = line[0..final]; 269 line = line[0..final];
274 } 270 }
275 // Iterate over fields. 271 // Iterate over fields.
276 var want = std.ArrayList(Word).init(allocator); 272 var want = std.array_list.Managed(Word).init(allocator);
277 defer want.deinit(); 273 defer want.deinit();
278 274
279 var all_bytes = std.ArrayList(u8).init(allocator); 275 var all_bytes = std.array_list.Managed(u8).init(allocator);
280 defer all_bytes.deinit(); 276 defer all_bytes.deinit();
281 277
282 var words = std.mem.splitSequence(u8, line, " ÷ "); 278 var words = std.mem.splitSequence(u8, line, " ÷ ");
@@ -439,26 +435,27 @@ test "Segmentation Word Iterator" {
439 if (idx == 0) break; 435 if (idx == 0) break;
440 } 436 }
441 } 437 }
438 } else |err| switch (err) {
439 error.EndOfStream => {},
440 else => {
441 return err;
442 },
442 } 443 }
443} 444}
444 445
445const IterRead = struct { 446const IterRead = struct {
446 read: *Reader, 447 read: *io.Reader,
447 line: usize = 0, 448 line: usize = 0,
448 449
449 pub fn next(iter: *IterRead, buf: []u8) !?[]const u8 { 450 pub fn next(iter: *IterRead) anyerror![]const u8 {
450 defer iter.line += 1; 451 iter.line += 1;
451 const maybe_line = try iter.read.readUntilDelimiterOrEof(buf, '#'); 452 const took = try iter.read.takeDelimiterInclusive('\n');
452 if (maybe_line) |this_line| { 453 const this_line = std.mem.trimRight(u8, took, "\n");
453 try iter.read.skipUntilDelimiterOrEof('\n'); 454 if (this_line.len == 0 or this_line[0] == '@' or this_line[0] == '#') {
454 if (this_line.len == 0 or this_line[0] == '@') { 455 // comment, next line
455 // comment, next line 456 return iter.next();
456 return iter.next(buf);
457 } else {
458 return this_line;
459 }
460 } else { 457 } else {
461 return null; 458 return this_line;
462 } 459 }
463 } 460 }
464}; 461};
@@ -467,7 +464,6 @@ const std = @import("std");
467const fmt = std.fmt; 464const fmt = std.fmt;
468const fs = std.fs; 465const fs = std.fs;
469const io = std.io; 466const io = std.io;
470const Reader = io.BufferedReader(4096, fs.File.Reader).Reader;
471const heap = std.heap; 467const heap = std.heap;
472const mem = std.mem; 468const mem = std.mem;
473const debug = std.debug; 469const debug = std.debug;