diff options
| author | 2026-01-13 01:10:17 +0300 | |
|---|---|---|
| committer | 2026-01-13 01:10:17 +0300 | |
| commit | dfece51720a17fddd0520ce8bda1a3c05d110949 (patch) | |
| tree | d841e9321c1135ba5644b444ba7bb508ec4df025 /src | |
| parent | Moved part of the `strWidth` into its own `graphemeClusterWidth` function (diff) | |
| parent | Merge pull request 'Use width 2 when skin tone modifier detected' (#96) from ... (diff) | |
| download | zg-dfece51720a17fddd0520ce8bda1a3c05d110949.tar.gz zg-dfece51720a17fddd0520ce8bda1a3c05d110949.tar.xz zg-dfece51720a17fddd0520ce8bda1a3c05d110949.zip | |
Merge branch 'master' of https://codeberg.org/atman/zg into graphemeClusterWidth
Diffstat (limited to 'src')
| -rw-r--r-- | src/CanonData.zig | 4 | ||||
| -rw-r--r-- | src/CaseFolding.zig | 6 | ||||
| -rw-r--r-- | src/CombiningData.zig | 5 | ||||
| -rw-r--r-- | src/CompatData.zig | 5 | ||||
| -rw-r--r-- | src/DisplayWidth.zig | 31 | ||||
| -rw-r--r-- | src/GeneralCategories.zig | 4 | ||||
| -rw-r--r-- | src/Graphemes.zig | 4 | ||||
| -rw-r--r-- | src/HangulData.zig | 5 | ||||
| -rw-r--r-- | src/LetterCasing.zig | 14 | ||||
| -rw-r--r-- | src/NormPropsData.zig | 5 | ||||
| -rw-r--r-- | src/Normalize.zig | 10 | ||||
| -rw-r--r-- | src/Properties.zig | 10 | ||||
| -rw-r--r-- | src/Scripts.zig | 5 | ||||
| -rw-r--r-- | src/Words.zig | 4 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 94 |
15 files changed, 92 insertions, 114 deletions
diff --git a/src/CanonData.zig b/src/CanonData.zig index 5d2332a..cf9dc8a 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -7,11 +7,9 @@ cps: []u21 = undefined, | |||
| 7 | const CanonData = @This(); | 7 | const CanonData = @This(); |
| 8 | 8 | ||
| 9 | pub fn init(allocator: mem.Allocator) !CanonData { | 9 | pub fn init(allocator: mem.Allocator) !CanonData { |
| 10 | const decompressor = compress.flate.inflate.decompressor; | ||
| 11 | const in_bytes = @embedFile("canon"); | 10 | const in_bytes = @embedFile("canon"); |
| 12 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 11 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 13 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 12 | var reader = in_fbs.reader(); |
| 14 | var reader = in_decomp.reader(); | ||
| 15 | 13 | ||
| 16 | const endian = builtin.cpu.arch.endian(); | 14 | const endian = builtin.cpu.arch.endian(); |
| 17 | var cdata = CanonData{ | 15 | var cdata = CanonData{ |
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index ff41b3e..df86b92 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig | |||
| @@ -48,11 +48,9 @@ fn setupImpl(casefold: *CaseFolding, allocator: Allocator) Allocator.Error!void | |||
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { | 50 | inline fn setupImplInner(casefold: *CaseFolding, allocator: Allocator) !void { |
| 51 | const decompressor = compress.flate.inflate.decompressor; | ||
| 52 | const in_bytes = @embedFile("fold"); | 51 | const in_bytes = @embedFile("fold"); |
| 53 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 52 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 54 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 53 | var reader = in_fbs.reader(); |
| 55 | var reader = in_decomp.reader(); | ||
| 56 | 54 | ||
| 57 | const endian = builtin.cpu.arch.endian(); | 55 | const endian = builtin.cpu.arch.endian(); |
| 58 | 56 | ||
| @@ -123,7 +121,7 @@ pub fn caseFoldAlloc( | |||
| 123 | allocator: Allocator, | 121 | allocator: Allocator, |
| 124 | cps: []const u21, | 122 | cps: []const u21, |
| 125 | ) Allocator.Error![]const u21 { | 123 | ) Allocator.Error![]const u21 { |
| 126 | var cfcps = std.ArrayList(u21).init(allocator); | 124 | var cfcps = std.array_list.Managed(u21).init(allocator); |
| 127 | defer cfcps.deinit(); | 125 | defer cfcps.deinit(); |
| 128 | var buf: [3]u21 = undefined; | 126 | var buf: [3]u21 = undefined; |
| 129 | 127 | ||
diff --git a/src/CombiningData.zig b/src/CombiningData.zig index fd64a3b..f58e0de 100644 --- a/src/CombiningData.zig +++ b/src/CombiningData.zig | |||
| @@ -6,11 +6,9 @@ s2: []u8 = undefined, | |||
| 6 | const CombiningData = @This(); | 6 | const CombiningData = @This(); |
| 7 | 7 | ||
| 8 | pub fn init(allocator: mem.Allocator) !CombiningData { | 8 | pub fn init(allocator: mem.Allocator) !CombiningData { |
| 9 | const decompressor = compress.flate.inflate.decompressor; | ||
| 10 | const in_bytes = @embedFile("ccc"); | 9 | const in_bytes = @embedFile("ccc"); |
| 11 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 10 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 12 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 11 | var reader = in_fbs.reader(); |
| 13 | var reader = in_decomp.reader(); | ||
| 14 | 12 | ||
| 15 | const endian = builtin.cpu.arch.endian(); | 13 | const endian = builtin.cpu.arch.endian(); |
| 16 | 14 | ||
| @@ -46,5 +44,4 @@ pub fn isStarter(cbdata: CombiningData, cp: u21) bool { | |||
| 46 | 44 | ||
| 47 | const std = @import("std"); | 45 | const std = @import("std"); |
| 48 | const builtin = @import("builtin"); | 46 | const builtin = @import("builtin"); |
| 49 | const compress = std.compress; | ||
| 50 | const mem = std.mem; | 47 | const mem = std.mem; |
diff --git a/src/CompatData.zig b/src/CompatData.zig index 794abca..40ecd12 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig | |||
| @@ -6,11 +6,9 @@ cps: []u21 = undefined, | |||
| 6 | const CompatData = @This(); | 6 | const CompatData = @This(); |
| 7 | 7 | ||
| 8 | pub fn init(allocator: mem.Allocator) !CompatData { | 8 | pub fn init(allocator: mem.Allocator) !CompatData { |
| 9 | const decompressor = compress.flate.inflate.decompressor; | ||
| 10 | const in_bytes = @embedFile("compat"); | 9 | const in_bytes = @embedFile("compat"); |
| 11 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 10 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 12 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 11 | var reader = in_fbs.reader(); |
| 13 | var reader = in_decomp.reader(); | ||
| 14 | 12 | ||
| 15 | const endian = builtin.cpu.arch.endian(); | 13 | const endian = builtin.cpu.arch.endian(); |
| 16 | var cpdata = CompatData{ | 14 | var cpdata = CompatData{ |
| @@ -55,6 +53,5 @@ pub fn toNfkd(cpdata: *const CompatData, cp: u21) []u21 { | |||
| 55 | 53 | ||
| 56 | const std = @import("std"); | 54 | const std = @import("std"); |
| 57 | const builtin = @import("builtin"); | 55 | const builtin = @import("builtin"); |
| 58 | const compress = std.compress; | ||
| 59 | const mem = std.mem; | 56 | const mem = std.mem; |
| 60 | const magic = @import("magic"); | 57 | const magic = @import("magic"); |
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index 629087b..dee7ebd 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -39,11 +39,9 @@ pub fn setupWithGraphemes(dw: *DisplayWidth, allocator: Allocator, graphemes: Gr | |||
| 39 | 39 | ||
| 40 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | 40 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. |
| 41 | pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { | 41 | pub fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { |
| 42 | const decompressor = compress.flate.inflate.decompressor; | ||
| 43 | const in_bytes = @embedFile("dwp"); | 42 | const in_bytes = @embedFile("dwp"); |
| 44 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 43 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 45 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 44 | var reader = in_fbs.reader(); |
| 46 | var reader = in_decomp.reader(); | ||
| 47 | 45 | ||
| 48 | const endian = builtin.cpu.arch.endian(); | 46 | const endian = builtin.cpu.arch.endian(); |
| 49 | 47 | ||
| @@ -118,6 +116,8 @@ pub fn graphemeClusterWidth(dw: DisplayWidth, gc: []const u8) isize { | |||
| 118 | // emoji text sequence. | 116 | // emoji text sequence. |
| 119 | if (ncp.code == 0xFE0E) w = 1; | 117 | if (ncp.code == 0xFE0E) w = 1; |
| 120 | if (ncp.code == 0xFE0F) w = 2; | 118 | if (ncp.code == 0xFE0F) w = 2; |
| 119 | // Skin tones | ||
| 120 | if (0x1F3FB <= ncp.code and ncp.code <= 0x1F3FF) w = 2; | ||
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | // Only adding width of first non-zero-width code point. | 123 | // Only adding width of first non-zero-width code point. |
| @@ -207,6 +207,9 @@ test "strWidth" { | |||
| 207 | try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); | 207 | try testing.expectEqual(@as(usize, 9), dw.strWidth("Ẓ̌á̲l͔̝̞̄̑͌g̖̘̘̔̔͢͞͝o̪̔T̢̙̫̈̍͞e̬͈͕͌̏͑x̺̍ṭ̓̓ͅ")); |
| 208 | try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); | 208 | try testing.expectEqual(@as(usize, 17), dw.strWidth("슬라바 우크라이나")); |
| 209 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); | 209 | try testing.expectEqual(@as(usize, 1), dw.strWidth("\u{378}")); |
| 210 | |||
| 211 | // https://codeberg.org/atman/zg/issues/82 | ||
| 212 | try testing.expectEqual(@as(usize, 12), dw.strWidth("✍️✍🏻✍🏼✍🏽✍🏾✍🏿")); | ||
| 210 | } | 213 | } |
| 211 | 214 | ||
| 212 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. | 215 | /// centers `str` in a new string of width `total_width` (in display cells) using `pad` as padding. |
| @@ -404,7 +407,7 @@ pub fn wrap( | |||
| 404 | columns: usize, | 407 | columns: usize, |
| 405 | threshold: usize, | 408 | threshold: usize, |
| 406 | ) ![]u8 { | 409 | ) ![]u8 { |
| 407 | var result = ArrayList(u8).init(allocator); | 410 | var result = std.array_list.Managed(u8).init(allocator); |
| 408 | defer result.deinit(); | 411 | defer result.deinit(); |
| 409 | 412 | ||
| 410 | var line_iter = mem.tokenizeAny(u8, str, "\r\n"); | 413 | var line_iter = mem.tokenizeAny(u8, str, "\r\n"); |
| @@ -426,8 +429,10 @@ pub fn wrap( | |||
| 426 | } | 429 | } |
| 427 | 430 | ||
| 428 | // Remove trailing space and newline. | 431 | // Remove trailing space and newline. |
| 429 | _ = result.pop(); | 432 | if (result.items[result.items.len - 1] == '\n') |
| 430 | _ = result.pop(); | 433 | _ = result.pop(); |
| 434 | if (result.items[result.items.len - 1] == ' ') | ||
| 435 | _ = result.pop(); | ||
| 431 | 436 | ||
| 432 | return try result.toOwnedSlice(); | 437 | return try result.toOwnedSlice(); |
| 433 | } | 438 | } |
| @@ -444,6 +449,18 @@ test "wrap" { | |||
| 444 | try testing.expectEqualStrings(want, got); | 449 | try testing.expectEqualStrings(want, got); |
| 445 | } | 450 | } |
| 446 | 451 | ||
| 452 | test "zg/74" { | ||
| 453 | var debug_alloc = std.heap.DebugAllocator(.{}).init; | ||
| 454 | const allocator = debug_alloc.allocator(); | ||
| 455 | defer _ = debug_alloc.deinit(); | ||
| 456 | const dw = try DisplayWidth.init(allocator); | ||
| 457 | defer dw.deinit(allocator); | ||
| 458 | const wrapped = try dw.wrap(allocator, "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam pellentesque pulvinar felis, sit amet commodo ligula feugiat sed. Sed quis malesuada elit, nec eleifend lectus. Sed tincidunt finibus aliquet. Praesent consectetur nibh libero, tempus imperdiet lorem congue eget.", 16, 1); | ||
| 459 | defer allocator.free(wrapped); | ||
| 460 | const expected_wrap = "Lorem ipsum dolor \nsit amet, consectetur \nadipiscing elit. \nNullam pellentesque \npulvinar felis, \nsit amet commodo \nligula feugiat \nsed. Sed quis malesuada \nelit, nec eleifend \nlectus. Sed tincidunt \nfinibus aliquet. \nPraesent consectetur \nnibh libero, tempus \nimperdiet lorem \ncongue eget."; | ||
| 461 | try std.testing.expectEqualStrings(expected_wrap, wrapped); | ||
| 462 | } | ||
| 463 | |||
| 447 | fn testAllocation(allocator: Allocator) !void { | 464 | fn testAllocation(allocator: Allocator) !void { |
| 448 | { | 465 | { |
| 449 | var dw = try DisplayWidth.init(allocator); | 466 | var dw = try DisplayWidth.init(allocator); |
| @@ -464,8 +481,6 @@ test "allocation test" { | |||
| 464 | const std = @import("std"); | 481 | const std = @import("std"); |
| 465 | const builtin = @import("builtin"); | 482 | const builtin = @import("builtin"); |
| 466 | const options = @import("options"); | 483 | const options = @import("options"); |
| 467 | const ArrayList = std.ArrayList; | ||
| 468 | const compress = std.compress; | ||
| 469 | const mem = std.mem; | 484 | const mem = std.mem; |
| 470 | const Allocator = mem.Allocator; | 485 | const Allocator = mem.Allocator; |
| 471 | const simd = std.simd; | 486 | const simd = std.simd; |
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index 8c1b6a3..eee7e56 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig | |||
| @@ -47,11 +47,9 @@ pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { | |||
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { | 49 | pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { |
| 50 | const decompressor = compress.flate.inflate.decompressor; | ||
| 51 | const in_bytes = @embedFile("gencat"); | 50 | const in_bytes = @embedFile("gencat"); |
| 52 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 51 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 53 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 52 | var reader = in_fbs.reader(); |
| 54 | var reader = in_decomp.reader(); | ||
| 55 | 53 | ||
| 56 | const endian = builtin.cpu.arch.endian(); | 54 | const endian = builtin.cpu.arch.endian(); |
| 57 | 55 | ||
diff --git a/src/Graphemes.zig b/src/Graphemes.zig index f1c56ed..81d874c 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig | |||
| @@ -16,11 +16,9 @@ pub fn init(allocator: Allocator) Allocator.Error!Graphemes { | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { | 18 | pub fn setup(graphemes: *Graphemes, allocator: Allocator) Allocator.Error!void { |
| 19 | const decompressor = compress.flate.inflate.decompressor; | ||
| 20 | const in_bytes = @embedFile("gbp"); | 19 | const in_bytes = @embedFile("gbp"); |
| 21 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 22 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 21 | var reader = in_fbs.reader(); |
| 23 | var reader = in_decomp.reader(); | ||
| 24 | 22 | ||
| 25 | const endian = builtin.cpu.arch.endian(); | 23 | const endian = builtin.cpu.arch.endian(); |
| 26 | 24 | ||
diff --git a/src/HangulData.zig b/src/HangulData.zig index 8c5f3ad..cae8b97 100644 --- a/src/HangulData.zig +++ b/src/HangulData.zig | |||
| @@ -15,11 +15,9 @@ s2: []u3 = undefined, | |||
| 15 | const Hangul = @This(); | 15 | const Hangul = @This(); |
| 16 | 16 | ||
| 17 | pub fn init(allocator: mem.Allocator) !Hangul { | 17 | pub fn init(allocator: mem.Allocator) !Hangul { |
| 18 | const decompressor = compress.flate.inflate.decompressor; | ||
| 19 | const in_bytes = @embedFile("hangul"); | 18 | const in_bytes = @embedFile("hangul"); |
| 20 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 19 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 21 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 20 | var reader = in_fbs.reader(); |
| 22 | var reader = in_decomp.reader(); | ||
| 23 | 21 | ||
| 24 | const endian = builtin.cpu.arch.endian(); | 22 | const endian = builtin.cpu.arch.endian(); |
| 25 | var hangul = Hangul{}; | 23 | var hangul = Hangul{}; |
| @@ -49,6 +47,5 @@ pub fn syllable(hangul: *const Hangul, cp: u21) Syllable { | |||
| 49 | 47 | ||
| 50 | const std = @import("std"); | 48 | const std = @import("std"); |
| 51 | const builtin = @import("builtin"); | 49 | const builtin = @import("builtin"); |
| 52 | const compress = std.compress; | ||
| 53 | const mem = std.mem; | 50 | const mem = std.mem; |
| 54 | const testing = std.testing; | 51 | const testing = std.testing; |
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index 11a3e96..33096fc 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig | |||
| @@ -22,7 +22,6 @@ pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void { | |||
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { | 24 | inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { |
| 25 | const decompressor = compress.flate.inflate.decompressor; | ||
| 26 | const endian = builtin.cpu.arch.endian(); | 25 | const endian = builtin.cpu.arch.endian(); |
| 27 | 26 | ||
| 28 | self.case_map = try allocator.alloc([2]u21, 0x110000); | 27 | self.case_map = try allocator.alloc([2]u21, 0x110000); |
| @@ -36,8 +35,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { | |||
| 36 | // Uppercase | 35 | // Uppercase |
| 37 | const upper_bytes = @embedFile("upper"); | 36 | const upper_bytes = @embedFile("upper"); |
| 38 | var upper_fbs = std.io.fixedBufferStream(upper_bytes); | 37 | var upper_fbs = std.io.fixedBufferStream(upper_bytes); |
| 39 | var upper_decomp = decompressor(.raw, upper_fbs.reader()); | 38 | var upper_reader = upper_fbs.reader(); |
| 40 | var upper_reader = upper_decomp.reader(); | ||
| 41 | 39 | ||
| 42 | while (true) { | 40 | while (true) { |
| 43 | const cp = try upper_reader.readInt(i24, endian); | 41 | const cp = try upper_reader.readInt(i24, endian); |
| @@ -49,8 +47,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { | |||
| 49 | // Lowercase | 47 | // Lowercase |
| 50 | const lower_bytes = @embedFile("lower"); | 48 | const lower_bytes = @embedFile("lower"); |
| 51 | var lower_fbs = std.io.fixedBufferStream(lower_bytes); | 49 | var lower_fbs = std.io.fixedBufferStream(lower_bytes); |
| 52 | var lower_decomp = decompressor(.raw, lower_fbs.reader()); | 50 | var lower_reader = lower_fbs.reader(); |
| 53 | var lower_reader = lower_decomp.reader(); | ||
| 54 | 51 | ||
| 55 | while (true) { | 52 | while (true) { |
| 56 | const cp = try lower_reader.readInt(i24, endian); | 53 | const cp = try lower_reader.readInt(i24, endian); |
| @@ -62,8 +59,7 @@ inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void { | |||
| 62 | // Case properties | 59 | // Case properties |
| 63 | const cp_bytes = @embedFile("case_prop"); | 60 | const cp_bytes = @embedFile("case_prop"); |
| 64 | var cp_fbs = std.io.fixedBufferStream(cp_bytes); | 61 | var cp_fbs = std.io.fixedBufferStream(cp_bytes); |
| 65 | var cp_decomp = decompressor(.raw, cp_fbs.reader()); | 62 | var cp_reader = cp_fbs.reader(); |
| 66 | var cp_reader = cp_decomp.reader(); | ||
| 67 | 63 | ||
| 68 | const stage_1_len: u16 = try cp_reader.readInt(u16, endian); | 64 | const stage_1_len: u16 = try cp_reader.readInt(u16, endian); |
| 69 | self.prop_s1 = try allocator.alloc(u16, stage_1_len); | 65 | self.prop_s1 = try allocator.alloc(u16, stage_1_len); |
| @@ -122,7 +118,7 @@ pub fn toUpperStr( | |||
| 122 | allocator: mem.Allocator, | 118 | allocator: mem.Allocator, |
| 123 | str: []const u8, | 119 | str: []const u8, |
| 124 | ) ![]u8 { | 120 | ) ![]u8 { |
| 125 | var bytes = std.ArrayList(u8).init(allocator); | 121 | var bytes = std.array_list.Managed(u8).init(allocator); |
| 126 | defer bytes.deinit(); | 122 | defer bytes.deinit(); |
| 127 | 123 | ||
| 128 | var iter = CodePointIterator{ .bytes = str }; | 124 | var iter = CodePointIterator{ .bytes = str }; |
| @@ -180,7 +176,7 @@ pub fn toLowerStr( | |||
| 180 | allocator: mem.Allocator, | 176 | allocator: mem.Allocator, |
| 181 | str: []const u8, | 177 | str: []const u8, |
| 182 | ) ![]u8 { | 178 | ) ![]u8 { |
| 183 | var bytes = std.ArrayList(u8).init(allocator); | 179 | var bytes = std.array_list.Managed(u8).init(allocator); |
| 184 | defer bytes.deinit(); | 180 | defer bytes.deinit(); |
| 185 | 181 | ||
| 186 | var iter = CodePointIterator{ .bytes = str }; | 182 | var iter = CodePointIterator{ .bytes = str }; |
diff --git a/src/NormPropsData.zig b/src/NormPropsData.zig index ca69569..7b53542 100644 --- a/src/NormPropsData.zig +++ b/src/NormPropsData.zig | |||
| @@ -6,11 +6,9 @@ s2: []u4 = undefined, | |||
| 6 | const NormProps = @This(); | 6 | const NormProps = @This(); |
| 7 | 7 | ||
| 8 | pub fn init(allocator: mem.Allocator) !NormProps { | 8 | pub fn init(allocator: mem.Allocator) !NormProps { |
| 9 | const decompressor = compress.flate.inflate.decompressor; | ||
| 10 | const in_bytes = @embedFile("normp"); | 9 | const in_bytes = @embedFile("normp"); |
| 11 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 10 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 12 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 11 | var reader = in_fbs.reader(); |
| 13 | var reader = in_decomp.reader(); | ||
| 14 | 12 | ||
| 15 | const endian = builtin.cpu.arch.endian(); | 13 | const endian = builtin.cpu.arch.endian(); |
| 16 | var norms = NormProps{}; | 14 | var norms = NormProps{}; |
| @@ -50,6 +48,5 @@ pub fn isFcx(norms: *const NormProps, cp: u21) bool { | |||
| 50 | 48 | ||
| 51 | const std = @import("std"); | 49 | const std = @import("std"); |
| 52 | const builtin = @import("builtin"); | 50 | const builtin = @import("builtin"); |
| 53 | const compress = std.compress; | ||
| 54 | const mem = std.mem; | 51 | const mem = std.mem; |
| 55 | const testing = std.testing; | 52 | const testing = std.testing; |
diff --git a/src/Normalize.zig b/src/Normalize.zig index 989ec29..4a1bae8 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -305,7 +305,7 @@ pub fn nfkd(self: Normalize, allocator: Allocator, str: []const u8) Allocator.Er | |||
| 305 | } | 305 | } |
| 306 | 306 | ||
| 307 | pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { | 307 | pub fn nfxdCodePoints(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allocator.Error![]u21 { |
| 308 | var dcp_list = std.ArrayList(u21).init(allocator); | 308 | var dcp_list = std.array_list.Managed(u21).init(allocator); |
| 309 | defer dcp_list.deinit(); | 309 | defer dcp_list.deinit(); |
| 310 | 310 | ||
| 311 | var cp_iter = CodePointIterator{ .bytes = str }; | 311 | var cp_iter = CodePointIterator{ .bytes = str }; |
| @@ -332,7 +332,7 @@ fn nfxd(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo | |||
| 332 | const dcps = try self.nfxdCodePoints(allocator, str, form); | 332 | const dcps = try self.nfxdCodePoints(allocator, str, form); |
| 333 | defer allocator.free(dcps); | 333 | defer allocator.free(dcps); |
| 334 | 334 | ||
| 335 | var dstr_list = std.ArrayList(u8).init(allocator); | 335 | var dstr_list = std.array_list.Managed(u8).init(allocator); |
| 336 | defer dstr_list.deinit(); | 336 | defer dstr_list.deinit(); |
| 337 | var buf: [4]u8 = undefined; | 337 | var buf: [4]u8 = undefined; |
| 338 | 338 | ||
| @@ -393,7 +393,7 @@ pub fn nfdCodePoints( | |||
| 393 | allocator: Allocator, | 393 | allocator: Allocator, |
| 394 | cps: []const u21, | 394 | cps: []const u21, |
| 395 | ) Allocator.Error![]u21 { | 395 | ) Allocator.Error![]u21 { |
| 396 | var dcp_list = std.ArrayList(u21).init(allocator); | 396 | var dcp_list = std.array_list.Managed(u21).init(allocator); |
| 397 | defer dcp_list.deinit(); | 397 | defer dcp_list.deinit(); |
| 398 | 398 | ||
| 399 | var dc_buf: [18]u21 = undefined; | 399 | var dc_buf: [18]u21 = undefined; |
| @@ -418,7 +418,7 @@ pub fn nfkdCodePoints( | |||
| 418 | allocator: Allocator, | 418 | allocator: Allocator, |
| 419 | cps: []const u21, | 419 | cps: []const u21, |
| 420 | ) Allocator.Error![]u21 { | 420 | ) Allocator.Error![]u21 { |
| 421 | var dcp_list = std.ArrayList(u21).init(allocator); | 421 | var dcp_list = std.array_list.Managed(u21).init(allocator); |
| 422 | defer dcp_list.deinit(); | 422 | defer dcp_list.deinit(); |
| 423 | 423 | ||
| 424 | var dc_buf: [18]u21 = undefined; | 424 | var dc_buf: [18]u21 = undefined; |
| @@ -560,7 +560,7 @@ fn nfxc(self: Normalize, allocator: Allocator, str: []const u8, form: Form) Allo | |||
| 560 | // If we have no deletions. the code point sequence | 560 | // If we have no deletions. the code point sequence |
| 561 | // has been fully composed. | 561 | // has been fully composed. |
| 562 | if (deleted == 0) { | 562 | if (deleted == 0) { |
| 563 | var cstr_list = std.ArrayList(u8).init(allocator); | 563 | var cstr_list = std.array_list.Managed(u8).init(allocator); |
| 564 | defer cstr_list.deinit(); | 564 | defer cstr_list.deinit(); |
| 565 | var buf: [4]u8 = undefined; | 565 | var buf: [4]u8 = undefined; |
| 566 | 566 | ||
diff --git a/src/Properties.zig b/src/Properties.zig index 73602a0..432d176 100644 --- a/src/Properties.zig +++ b/src/Properties.zig | |||
| @@ -25,14 +25,12 @@ pub fn setup(props: *Properties, allocator: Allocator) Allocator.Error!void { | |||
| 25 | } | 25 | } |
| 26 | 26 | ||
| 27 | inline fn setupInner(props: *Properties, allocator: Allocator) !void { | 27 | inline fn setupInner(props: *Properties, allocator: Allocator) !void { |
| 28 | const decompressor = compress.flate.inflate.decompressor; | ||
| 29 | const endian = builtin.cpu.arch.endian(); | 28 | const endian = builtin.cpu.arch.endian(); |
| 30 | 29 | ||
| 31 | // Process DerivedCoreProperties.txt | 30 | // Process DerivedCoreProperties.txt |
| 32 | const core_bytes = @embedFile("core_props"); | 31 | const core_bytes = @embedFile("core_props"); |
| 33 | var core_fbs = std.io.fixedBufferStream(core_bytes); | 32 | var core_fbs = std.io.fixedBufferStream(core_bytes); |
| 34 | var core_decomp = decompressor(.raw, core_fbs.reader()); | 33 | var core_reader = core_fbs.reader(); |
| 35 | var core_reader = core_decomp.reader(); | ||
| 36 | 34 | ||
| 37 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); | 35 | const core_stage_1_len: u16 = try core_reader.readInt(u16, endian); |
| 38 | props.core_s1 = try allocator.alloc(u16, core_stage_1_len); | 36 | props.core_s1 = try allocator.alloc(u16, core_stage_1_len); |
| @@ -47,8 +45,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void { | |||
| 47 | // Process PropList.txt | 45 | // Process PropList.txt |
| 48 | const props_bytes = @embedFile("props"); | 46 | const props_bytes = @embedFile("props"); |
| 49 | var props_fbs = std.io.fixedBufferStream(props_bytes); | 47 | var props_fbs = std.io.fixedBufferStream(props_bytes); |
| 50 | var props_decomp = decompressor(.raw, props_fbs.reader()); | 48 | var props_reader = props_fbs.reader(); |
| 51 | var props_reader = props_decomp.reader(); | ||
| 52 | 49 | ||
| 53 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); | 50 | const stage_1_len: u16 = try props_reader.readInt(u16, endian); |
| 54 | props.props_s1 = try allocator.alloc(u16, stage_1_len); | 51 | props.props_s1 = try allocator.alloc(u16, stage_1_len); |
| @@ -63,8 +60,7 @@ inline fn setupInner(props: *Properties, allocator: Allocator) !void { | |||
| 63 | // Process DerivedNumericType.txt | 60 | // Process DerivedNumericType.txt |
| 64 | const num_bytes = @embedFile("numeric"); | 61 | const num_bytes = @embedFile("numeric"); |
| 65 | var num_fbs = std.io.fixedBufferStream(num_bytes); | 62 | var num_fbs = std.io.fixedBufferStream(num_bytes); |
| 66 | var num_decomp = decompressor(.raw, num_fbs.reader()); | 63 | var num_reader = num_fbs.reader(); |
| 67 | var num_reader = num_decomp.reader(); | ||
| 68 | 64 | ||
| 69 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); | 65 | const num_stage_1_len: u16 = try num_reader.readInt(u16, endian); |
| 70 | props.num_s1 = try allocator.alloc(u16, num_stage_1_len); | 66 | props.num_s1 = try allocator.alloc(u16, num_stage_1_len); |
diff --git a/src/Scripts.zig b/src/Scripts.zig index 3bc90bc..719b01f 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig | |||
| @@ -196,11 +196,9 @@ pub fn setup(scripts: *Scripts, allocator: Allocator) Allocator.Error!void { | |||
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { | 198 | inline fn setupInner(scripts: *Scripts, allocator: mem.Allocator) !void { |
| 199 | const decompressor = compress.flate.inflate.decompressor; | ||
| 200 | const in_bytes = @embedFile("scripts"); | 199 | const in_bytes = @embedFile("scripts"); |
| 201 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 200 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 202 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 201 | var reader = in_fbs.reader(); |
| 203 | var reader = in_decomp.reader(); | ||
| 204 | 202 | ||
| 205 | const endian = builtin.cpu.arch.endian(); | 203 | const endian = builtin.cpu.arch.endian(); |
| 206 | 204 | ||
| @@ -250,7 +248,6 @@ test "Allocation failure" { | |||
| 250 | 248 | ||
| 251 | const std = @import("std"); | 249 | const std = @import("std"); |
| 252 | const builtin = @import("builtin"); | 250 | const builtin = @import("builtin"); |
| 253 | const compress = std.compress; | ||
| 254 | const mem = std.mem; | 251 | const mem = std.mem; |
| 255 | const Allocator = mem.Allocator; | 252 | const Allocator = mem.Allocator; |
| 256 | const testing = std.testing; | 253 | const testing = std.testing; |
diff --git a/src/Words.zig b/src/Words.zig index 617c34d..ce3203f 100644 --- a/src/Words.zig +++ b/src/Words.zig | |||
| @@ -605,11 +605,9 @@ const SneakIterator = struct { | |||
| 605 | }; | 605 | }; |
| 606 | 606 | ||
| 607 | inline fn setupImpl(wb: *Words, allocator: Allocator) !void { | 607 | inline fn setupImpl(wb: *Words, allocator: Allocator) !void { |
| 608 | const decompressor = compress.flate.inflate.decompressor; | ||
| 609 | const in_bytes = @embedFile("wbp"); | 608 | const in_bytes = @embedFile("wbp"); |
| 610 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 609 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| 611 | var in_decomp = decompressor(.raw, in_fbs.reader()); | 610 | var reader = in_fbs.reader(); |
| 612 | var reader = in_decomp.reader(); | ||
| 613 | 611 | ||
| 614 | const endian = builtin.cpu.arch.endian(); | 612 | const endian = builtin.cpu.arch.endian(); |
| 615 | 613 | ||
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index ae177a9..e2a5a96 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -3,35 +3,30 @@ const dbg_print = false; | |||
| 3 | test "Unicode normalization tests" { | 3 | test "Unicode normalization tests" { |
| 4 | var arena = heap.ArenaAllocator.init(testing.allocator); | 4 | var arena = heap.ArenaAllocator.init(testing.allocator); |
| 5 | defer arena.deinit(); | 5 | defer arena.deinit(); |
| 6 | var allocator = arena.allocator(); | 6 | const allocator = arena.allocator(); |
| 7 | 7 | ||
| 8 | const n = try Normalize.init(allocator); | 8 | const n = try Normalize.init(allocator); |
| 9 | defer n.deinit(allocator); | 9 | defer n.deinit(allocator); |
| 10 | 10 | ||
| 11 | var file = try fs.cwd().openFile("data/unicode/NormalizationTest.txt", .{}); | 11 | var reader = std.io.Reader.fixed(@embedFile("NormalizationTest.txt")); |
| 12 | defer file.close(); | ||
| 13 | var buf_reader = io.bufferedReader(file.reader()); | ||
| 14 | var input_stream = buf_reader.reader(); | ||
| 15 | |||
| 16 | var buf: [4096]u8 = undefined; | ||
| 17 | var cp_buf: [4]u8 = undefined; | 12 | var cp_buf: [4]u8 = undefined; |
| 18 | 13 | ||
| 19 | var line_iter: IterRead = .{ .read = &input_stream }; | 14 | var line_iter: IterRead = .{ .read = &reader }; |
| 20 | 15 | ||
| 21 | while (try line_iter.next(&buf)) |line| { | 16 | while (line_iter.next()) |line| { |
| 22 | // Iterate over fields. | 17 | // Iterate over fields. |
| 23 | var fields = mem.splitScalar(u8, line, ';'); | 18 | var fields = mem.splitScalar(u8, line, ';'); |
| 24 | var field_index: usize = 0; | 19 | var field_index: usize = 0; |
| 25 | var input: []u8 = undefined; | 20 | var input: []u8 = undefined; |
| 26 | defer allocator.free(input); | 21 | if (dbg_print) std.debug.print("Line: {s}\n", .{line}); |
| 27 | |||
| 28 | while (fields.next()) |field| : (field_index += 1) { | 22 | while (fields.next()) |field| : (field_index += 1) { |
| 29 | if (field_index == 0) { | 23 | if (field_index == 0) { |
| 30 | var i_buf = std.ArrayList(u8).init(allocator); | 24 | var i_buf = std.array_list.Managed(u8).init(allocator); |
| 31 | defer i_buf.deinit(); | 25 | defer i_buf.deinit(); |
| 32 | 26 | ||
| 33 | var i_fields = mem.splitScalar(u8, field, ' '); | 27 | var i_fields = mem.splitScalar(u8, field, ' '); |
| 34 | while (i_fields.next()) |s| { | 28 | while (i_fields.next()) |s| { |
| 29 | if (dbg_print) std.debug.print("Debug: {s}\n", .{s}); | ||
| 35 | const icp = try fmt.parseInt(u21, s, 16); | 30 | const icp = try fmt.parseInt(u21, s, 16); |
| 36 | const len = try unicode.utf8Encode(icp, &cp_buf); | 31 | const len = try unicode.utf8Encode(icp, &cp_buf); |
| 37 | try i_buf.appendSlice(cp_buf[0..len]); | 32 | try i_buf.appendSlice(cp_buf[0..len]); |
| @@ -41,7 +36,7 @@ test "Unicode normalization tests" { | |||
| 41 | } else if (field_index == 1) { | 36 | } else if (field_index == 1) { |
| 42 | if (dbg_print) debug.print("\n*** {s} ***\n", .{line}); | 37 | if (dbg_print) debug.print("\n*** {s} ***\n", .{line}); |
| 43 | // NFC, time to test. | 38 | // NFC, time to test. |
| 44 | var w_buf = std.ArrayList(u8).init(allocator); | 39 | var w_buf = std.array_list.Managed(u8).init(allocator); |
| 45 | defer w_buf.deinit(); | 40 | defer w_buf.deinit(); |
| 46 | 41 | ||
| 47 | var w_fields = mem.splitScalar(u8, field, ' '); | 42 | var w_fields = mem.splitScalar(u8, field, ' '); |
| @@ -58,7 +53,7 @@ test "Unicode normalization tests" { | |||
| 58 | try testing.expectEqualStrings(want, got.slice); | 53 | try testing.expectEqualStrings(want, got.slice); |
| 59 | } else if (field_index == 2) { | 54 | } else if (field_index == 2) { |
| 60 | // NFD, time to test. | 55 | // NFD, time to test. |
| 61 | var w_buf = std.ArrayList(u8).init(allocator); | 56 | var w_buf = std.array_list.Managed(u8).init(allocator); |
| 62 | defer w_buf.deinit(); | 57 | defer w_buf.deinit(); |
| 63 | 58 | ||
| 64 | var w_fields = mem.splitScalar(u8, field, ' '); | 59 | var w_fields = mem.splitScalar(u8, field, ' '); |
| @@ -75,7 +70,7 @@ test "Unicode normalization tests" { | |||
| 75 | try testing.expectEqualStrings(want, got.slice); | 70 | try testing.expectEqualStrings(want, got.slice); |
| 76 | } else if (field_index == 3) { | 71 | } else if (field_index == 3) { |
| 77 | // NFKC, time to test. | 72 | // NFKC, time to test. |
| 78 | var w_buf = std.ArrayList(u8).init(allocator); | 73 | var w_buf = std.array_list.Managed(u8).init(allocator); |
| 79 | defer w_buf.deinit(); | 74 | defer w_buf.deinit(); |
| 80 | 75 | ||
| 81 | var w_fields = mem.splitScalar(u8, field, ' '); | 76 | var w_fields = mem.splitScalar(u8, field, ' '); |
| @@ -92,7 +87,7 @@ test "Unicode normalization tests" { | |||
| 92 | try testing.expectEqualStrings(want, got.slice); | 87 | try testing.expectEqualStrings(want, got.slice); |
| 93 | } else if (field_index == 4) { | 88 | } else if (field_index == 4) { |
| 94 | // NFKD, time to test. | 89 | // NFKD, time to test. |
| 95 | var w_buf = std.ArrayList(u8).init(allocator); | 90 | var w_buf = std.array_list.Managed(u8).init(allocator); |
| 96 | defer w_buf.deinit(); | 91 | defer w_buf.deinit(); |
| 97 | 92 | ||
| 98 | var w_fields = mem.splitScalar(u8, field, ' '); | 93 | var w_fields = mem.splitScalar(u8, field, ' '); |
| @@ -111,33 +106,34 @@ test "Unicode normalization tests" { | |||
| 111 | continue; | 106 | continue; |
| 112 | } | 107 | } |
| 113 | } | 108 | } |
| 109 | } else |err| switch (err) { | ||
| 110 | error.EndOfStream => {}, | ||
| 111 | else => { | ||
| 112 | return err; | ||
| 113 | }, | ||
| 114 | } | 114 | } |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | test "Segmentation GraphemeIterator" { | 117 | test "Segmentation GraphemeIterator" { |
| 118 | const allocator = std.testing.allocator; | 118 | const allocator = std.testing.allocator; |
| 119 | var file = try std.fs.cwd().openFile("data/unicode/auxiliary/GraphemeBreakTest.txt", .{}); | ||
| 120 | defer file.close(); | ||
| 121 | var buf_reader = std.io.bufferedReader(file.reader()); | ||
| 122 | var input_stream = buf_reader.reader(); | ||
| 123 | 119 | ||
| 120 | var reader = std.io.Reader.fixed(@embedFile("GraphemeBreakTest.txt")); | ||
| 124 | const graph = try Graphemes.init(allocator); | 121 | const graph = try Graphemes.init(allocator); |
| 125 | defer graph.deinit(allocator); | 122 | defer graph.deinit(allocator); |
| 126 | 123 | ||
| 127 | var buf: [4096]u8 = undefined; | 124 | var line_iter: IterRead = .{ .read = &reader }; |
| 128 | var line_iter: IterRead = .{ .read = &input_stream }; | ||
| 129 | 125 | ||
| 130 | while (try line_iter.next(&buf)) |raw| { | 126 | while (line_iter.next()) |raw| { |
| 131 | // Clean up. | 127 | // Clean up. |
| 132 | var line = std.mem.trimLeft(u8, raw, "÷ "); | 128 | var line = std.mem.trimLeft(u8, raw, "÷ "); |
| 133 | if (std.mem.indexOf(u8, line, " ÷\t")) |final| { | 129 | if (std.mem.indexOf(u8, line, " ÷\t")) |final| { |
| 134 | line = line[0..final]; | 130 | line = line[0..final]; |
| 135 | } | 131 | } |
| 136 | // Iterate over fields. | 132 | // Iterate over fields. |
| 137 | var want = std.ArrayList(Grapheme).init(allocator); | 133 | var want = std.array_list.Managed(Grapheme).init(allocator); |
| 138 | defer want.deinit(); | 134 | defer want.deinit(); |
| 139 | 135 | ||
| 140 | var all_bytes = std.ArrayList(u8).init(allocator); | 136 | var all_bytes = std.array_list.Managed(u8).init(allocator); |
| 141 | defer all_bytes.deinit(); | 137 | defer all_bytes.deinit(); |
| 142 | 138 | ||
| 143 | var graphemes = std.mem.splitSequence(u8, line, " ÷ "); | 139 | var graphemes = std.mem.splitSequence(u8, line, " ÷ "); |
| @@ -250,33 +246,33 @@ test "Segmentation GraphemeIterator" { | |||
| 250 | } | 246 | } |
| 251 | } | 247 | } |
| 252 | } | 248 | } |
| 249 | } else |err| switch (err) { | ||
| 250 | error.EndOfStream => {}, | ||
| 251 | else => { | ||
| 252 | return err; | ||
| 253 | }, | ||
| 253 | } | 254 | } |
| 254 | } | 255 | } |
| 255 | 256 | ||
| 256 | test "Segmentation Word Iterator" { | 257 | test "Segmentation Word Iterator" { |
| 257 | const allocator = std.testing.allocator; | 258 | const allocator = std.testing.allocator; |
| 258 | var file = try std.fs.cwd().openFile("data/unicode/auxiliary/WordBreakTest.txt", .{}); | 259 | var reader = std.io.Reader.fixed(@embedFile("WordBreakTest.txt")); |
| 259 | defer file.close(); | ||
| 260 | var buf_reader = std.io.bufferedReader(file.reader()); | ||
| 261 | var input_stream = buf_reader.reader(); | ||
| 262 | |||
| 263 | const wb = try Words.init(allocator); | 260 | const wb = try Words.init(allocator); |
| 264 | defer wb.deinit(allocator); | 261 | defer wb.deinit(allocator); |
| 265 | 262 | ||
| 266 | var buf: [4096]u8 = undefined; | 263 | var line_iter: IterRead = .{ .read = &reader }; |
| 267 | var line_iter: IterRead = .{ .read = &input_stream }; | ||
| 268 | 264 | ||
| 269 | while (try line_iter.next(&buf)) |raw| { | 265 | while (line_iter.next()) |raw| { |
| 270 | // Clean up. | 266 | // Clean up. |
| 271 | var line = std.mem.trimLeft(u8, raw, "÷ "); | 267 | var line = std.mem.trimLeft(u8, raw, "÷ "); |
| 272 | if (std.mem.indexOf(u8, line, " ÷\t")) |final| { | 268 | if (std.mem.indexOf(u8, line, " ÷\t")) |final| { |
| 273 | line = line[0..final]; | 269 | line = line[0..final]; |
| 274 | } | 270 | } |
| 275 | // Iterate over fields. | 271 | // Iterate over fields. |
| 276 | var want = std.ArrayList(Word).init(allocator); | 272 | var want = std.array_list.Managed(Word).init(allocator); |
| 277 | defer want.deinit(); | 273 | defer want.deinit(); |
| 278 | 274 | ||
| 279 | var all_bytes = std.ArrayList(u8).init(allocator); | 275 | var all_bytes = std.array_list.Managed(u8).init(allocator); |
| 280 | defer all_bytes.deinit(); | 276 | defer all_bytes.deinit(); |
| 281 | 277 | ||
| 282 | var words = std.mem.splitSequence(u8, line, " ÷ "); | 278 | var words = std.mem.splitSequence(u8, line, " ÷ "); |
| @@ -439,26 +435,27 @@ test "Segmentation Word Iterator" { | |||
| 439 | if (idx == 0) break; | 435 | if (idx == 0) break; |
| 440 | } | 436 | } |
| 441 | } | 437 | } |
| 438 | } else |err| switch (err) { | ||
| 439 | error.EndOfStream => {}, | ||
| 440 | else => { | ||
| 441 | return err; | ||
| 442 | }, | ||
| 442 | } | 443 | } |
| 443 | } | 444 | } |
| 444 | 445 | ||
| 445 | const IterRead = struct { | 446 | const IterRead = struct { |
| 446 | read: *Reader, | 447 | read: *io.Reader, |
| 447 | line: usize = 0, | 448 | line: usize = 0, |
| 448 | 449 | ||
| 449 | pub fn next(iter: *IterRead, buf: []u8) !?[]const u8 { | 450 | pub fn next(iter: *IterRead) anyerror![]const u8 { |
| 450 | defer iter.line += 1; | 451 | iter.line += 1; |
| 451 | const maybe_line = try iter.read.readUntilDelimiterOrEof(buf, '#'); | 452 | const took = try iter.read.takeDelimiterInclusive('\n'); |
| 452 | if (maybe_line) |this_line| { | 453 | const this_line = std.mem.trimRight(u8, took, "\n"); |
| 453 | try iter.read.skipUntilDelimiterOrEof('\n'); | 454 | if (this_line.len == 0 or this_line[0] == '@' or this_line[0] == '#') { |
| 454 | if (this_line.len == 0 or this_line[0] == '@') { | 455 | // comment, next line |
| 455 | // comment, next line | 456 | return iter.next(); |
| 456 | return iter.next(buf); | ||
| 457 | } else { | ||
| 458 | return this_line; | ||
| 459 | } | ||
| 460 | } else { | 457 | } else { |
| 461 | return null; | 458 | return this_line; |
| 462 | } | 459 | } |
| 463 | } | 460 | } |
| 464 | }; | 461 | }; |
| @@ -467,7 +464,6 @@ const std = @import("std"); | |||
| 467 | const fmt = std.fmt; | 464 | const fmt = std.fmt; |
| 468 | const fs = std.fs; | 465 | const fs = std.fs; |
| 469 | const io = std.io; | 466 | const io = std.io; |
| 470 | const Reader = io.BufferedReader(4096, fs.File.Reader).Reader; | ||
| 471 | const heap = std.heap; | 467 | const heap = std.heap; |
| 472 | const mem = std.mem; | 468 | const mem = std.mem; |
| 473 | const debug = std.debug; | 469 | const debug = std.debug; |