diff options
| author | 2025-04-30 20:30:39 -0400 | |
|---|---|---|
| committer | 2025-04-30 20:30:39 -0400 | |
| commit | 10048b0d31d0db923ae39c6bbd67139ed6252f6f (patch) | |
| tree | 65df1666aacd102f59b4ac0844ccc7f7ddda91db | |
| parent | Setup variants for all allocating modules (diff) | |
| download | zg-10048b0d31d0db923ae39c6bbd67139ed6252f6f.tar.gz zg-10048b0d31d0db923ae39c6bbd67139ed6252f6f.tar.xz zg-10048b0d31d0db923ae39c6bbd67139ed6252f6f.zip | |
Allocation Failure Tests
These turned up an excessive amount of allocations in CanonData and
CompatData, which have been reduced to two through the somewhat
squirrely use of 'magic numbers'.
There are now allocation tests for every allocated structure in the
library, and they run to completion in a reasonable amount of time.
So, that's nice.
| -rw-r--r-- | NEWS.md | 115 | ||||
| -rw-r--r-- | src/CanonData.zig | 24 | ||||
| -rw-r--r-- | src/CaseFolding.zig | 15 | ||||
| -rw-r--r-- | src/CompatData.zig | 20 | ||||
| -rw-r--r-- | src/DisplayWidth.zig | 72 | ||||
| -rw-r--r-- | src/GeneralCategories.zig | 79 | ||||
| -rw-r--r-- | src/LetterCasing.zig | 9 | ||||
| -rw-r--r-- | src/Normalize.zig | 15 | ||||
| -rw-r--r-- | src/Properties.zig | 9 | ||||
| -rw-r--r-- | src/Scripts.zig | 9 | ||||
| -rw-r--r-- | src/magic_numbers.zig | 15 | ||||
| -rw-r--r-- | src/unicode_tests.zig | 2 |
12 files changed, 279 insertions, 105 deletions
| @@ -4,24 +4,56 @@ | |||
| 4 | 4 | ||
| 5 | This is the first minor point release since Sam Atman (me) took over | 5 | This is the first minor point release since Sam Atman (me) took over |
| 6 | maintenance of `zg` from the inimitable José Colon, aka | 6 | maintenance of `zg` from the inimitable José Colon, aka |
| 7 | @dude_the_builder. | 7 | @dude_the_builder. We're all grateful for everything he's done for |
| 8 | the Zig community. | ||
| 8 | 9 | ||
| 9 | As it's a fairly complex project, I'm adding a NEWS.md so that users | 10 | The changes are fairly large, and most user code will need to be updated. |
| 10 | have a place to check for changes. | 11 | The result is substantially streamlined and easier to use, and updating |
| 12 | will mainly take place around importing, creating, and deinitializing. | ||
| 11 | 13 | ||
| 12 | ### Data is Unmanaged | 14 | ### The Great Renaming |
| 13 | 15 | ||
| 14 | This is the biggest change. Prior to `v0.14`, all structs which need | 16 | The most obvious change is on the surface API: more than half of the modules |
| 15 | heap allocation no longer have a copy of their allocator. It was felt | 17 | have been renamed. There are no user-facing modules with `Data` in the name, |
| 16 | that this was redundant, especially when several such structures were | 18 | and some abbreviations have been spelled in full. |
| 17 | in use, and it reflects a general trend in the standard library toward | 19 | |
| 18 | fewer managed data structures. | 20 | ### No More Separation of Data and Functionality |
| 21 | |||
| 22 | It is no longer necessary to separately create, for example, a `GraphemeData` | ||
| 23 | structure, in order to use the functionality provided by the `grapheme` | ||
| 24 | module. | ||
| 25 | |||
| 26 | Instead there's just `Graphemes`, and the same for a couple of other modules | ||
| 27 | which worked the same way. This means that the cases where functionality | ||
| 28 | was provided by a wrapped pointer is not provided directly from the struct | ||
| 29 | with the necessary data. | ||
| 30 | |||
| 31 | This would make user structs larger in some cases, while eliminating a | ||
| 32 | pointer chase. If that isn't a desirable trade off for your code, | ||
| 33 | read on. | ||
| 34 | |||
| 35 | ### All Allocated Data is Unmanaged | ||
| 36 | |||
| 37 | Prior to `v0.14`, all structs which need heap allocation no longer | ||
| 38 | have a copy of their allocator. We felt that this was redundant, | ||
| 39 | especially when several such structures were in use, and it reflects | ||
| 40 | a general trend in the standard library toward fewer managed data | ||
| 41 | structures. | ||
| 19 | 42 | ||
| 20 | Getting up to speed is a matter of passing the allocator to `deinit`. | 43 | Getting up to speed is a matter of passing the allocator to `deinit`. |
| 21 | 44 | ||
| 22 | This change comes courtesy of [lch361](https://lch361.net), in his | 45 | This change comes courtesy of [lch361](https://lch361.net), in his |
| 23 | first contribution to the repo. Thanks Lich! | 46 | first contribution to the repo. Thanks Lich! |
| 24 | 47 | ||
| 48 | ### DisplayWidth and CaseFolding Can Share Data | ||
| 49 | |||
| 50 | Both of these modules use another module to get the job done, `Graphemes` | ||
| 51 | for `DisplayWidth`, and `Normalize` for `CaseFolding`. | ||
| 52 | |||
| 53 | It is now possible to initialize them with a borrowed copy of those | ||
| 54 | modules, to make it simpler to write code which also needs the base | ||
| 55 | modules. | ||
| 56 | |||
| 25 | ### Grapheme Iterator Creation | 57 | ### Grapheme Iterator Creation |
| 26 | 58 | ||
| 27 | This is a modest streamlining of how a grapheme iterator is created. | 59 | This is a modest streamlining of how a grapheme iterator is created. |
| @@ -37,10 +69,65 @@ var iter = grapheme.Iterator.init("🤘🏻some rad string! 🤘🏿", &gd); | |||
| 37 | Now: | 69 | Now: |
| 38 | 70 | ||
| 39 | ```zig | 71 | ```zig |
| 40 | const gd = try grapheme.GraphemeData.init(allocator); | 72 | const graphemes = try Graphemes.init(allocator); |
| 41 | defer gd.deinit(allocator); | 73 | defer graphemes.deinit(allocator); |
| 42 | var iter = gd.iterator("🤘🏻some rad string! 🤘🏿"); | 74 | var iter = graphemes.iterator("🤘🏻some rad string! 🤘🏿"); |
| 43 | ``` | 75 | ``` |
| 44 | 76 | ||
| 45 | You can still make an iterator with `grapheme.Iterator.init`, but the | 77 | It remains possible to use |
| 46 | second argument has to be `&gd.gd`. | 78 | |
| 79 | ```zig | ||
| 80 | var iter = Graphemes.Iterator.init("stri̵̢̡̡̡̨̧̡̨̡̡̡̨̫̗̗̱̳̼̖͚͉̩̬̬͚̟̣̮̬̙̖̗͇̮͓̻̫͍͎͉͎̹̩̗͖͈̙̻̭̝̭̼̙̯̪͚̙͉͎͎͖̥̹͈̫͍̹͓̘̙͎͖̝̦͎̤̼̹͕͈̪̙̪̯̯͙̝͈͕̬̪̗̭͎͖̟͚̦̣̘͙̞̮̹̙͚̼̤̟͉̭͔̩͍͔͈̯͎̘͎̭̥̖̜͙̖̖͍̼͙͎͚̦̮̹̞̺͍̳̖̹̼̲̠̩̰̳͂̌̈́̓̄͋̇̎͜͜͠ͅͅͅͅng", &graphemes); | ||
| 81 | ``` | ||
| 82 | |||
| 83 | If one were to prefer doing so. | ||
| 84 | |||
| 85 | ### Initialization vs. Setup | ||
| 86 | |||
| 87 | Every allocating module now has both an `init` function, which | ||
| 88 | returns the created struct, and a `setup` function. The latter | ||
| 89 | takes a mutable pointer, and an `Allocator`, returning | ||
| 90 | `Allocator.Error!void`. | ||
| 91 | |||
| 92 | So those who might prefer a single-pointer home for such modules | ||
| 93 | can allocate the struct on the heap with `allocator.create`, or | ||
| 94 | add a pointer field to some other struct, then use `setup` to | ||
| 95 | populate it. | ||
| 96 | |||
| 97 | In the process, the various spurious reader and decompression errors | ||
| 98 | have been turned `unreachable`, leaving only `error.OutOfMemory`. | ||
| 99 | Encountering any of the other errors would indicate an internal problem, | ||
| 100 | so we no longer make user code deal with that unlikely event. | ||
| 101 | |||
| 102 | ### New DisplayWidth options | ||
| 103 | |||
| 104 | A `DisplayWidth` can now be compiled to treat `c0` and `c1` control codes | ||
| 105 | as having a width. Canonically, terminals don't print them, so they would | ||
| 106 | have a width of 0. However, some applications (`vim` for example) need to | ||
| 107 | escape control codes to make them visible. Setting these options will let | ||
| 108 | `DisplayWidth` return the correct widths when this is done. | ||
| 109 | |||
| 110 | ### Unicode 16.0 | ||
| 111 | |||
| 112 | This updates `zg` to use the latest Unicode edition. This should be | ||
| 113 | the only change which will change behavior of user code, other than through | ||
| 114 | the use of the new `DisplayWidth` options. | ||
| 115 | |||
| 116 | ### Tests | ||
| 117 | |||
| 118 | Is is now possible to run all the tests, not just the `unicode-test` subset. | ||
| 119 | Accordingly, that step is removed, and `zig build test` runs everything. | ||
| 120 | |||
| 121 | #### Allocations Tested | ||
| 122 | |||
| 123 | Every allocate-able now has a `checkAllAllocationFailures` test. This | ||
| 124 | process turned up two bugs. Also discovered were 8,663 allocations which | ||
| 125 | were reduced to two, these were also being individually freed on deinit. | ||
| 126 | So that's nice. | ||
| 127 | |||
| 128 | #### That's It! | ||
| 129 | |||
| 130 | I hope you find converting over `zg v0.13` code to be fairly painless and | ||
| 131 | straightforward. There should be no need to make changes of this magnitude | ||
| 132 | in the future. | ||
| 133 | |||
diff --git a/src/CanonData.zig b/src/CanonData.zig index d95a5be..5d2332a 100644 --- a/src/CanonData.zig +++ b/src/CanonData.zig | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | 2 | ||
| 3 | nfc: std.AutoHashMapUnmanaged([2]u21, u21), | 3 | nfc: std.AutoHashMapUnmanaged([2]u21, u21), |
| 4 | nfd: [][]u21 = undefined, | 4 | nfd: [][]u21 = undefined, |
| 5 | cps: []u21 = undefined, | ||
| 5 | 6 | ||
| 6 | const CanonData = @This(); | 7 | const CanonData = @This(); |
| 7 | 8 | ||
| @@ -17,23 +18,29 @@ pub fn init(allocator: mem.Allocator) !CanonData { | |||
| 17 | .nfc = .empty, | 18 | .nfc = .empty, |
| 18 | .nfd = try allocator.alloc([]u21, 0x110000), | 19 | .nfd = try allocator.alloc([]u21, 0x110000), |
| 19 | }; | 20 | }; |
| 20 | var _cp: u24 = undefined; | 21 | { |
| 22 | errdefer allocator.free(cdata.nfd); | ||
| 23 | cdata.cps = try allocator.alloc(u21, magic.canon_size); | ||
| 24 | } | ||
| 25 | |||
| 26 | var total_cp: u24 = undefined; | ||
| 21 | 27 | ||
| 22 | errdefer { | 28 | errdefer { |
| 23 | cdata.nfc.deinit(allocator); | 29 | cdata.nfc.deinit(allocator); |
| 24 | for (cdata.nfd[0.._cp]) |slice| allocator.free(slice); | 30 | allocator.free(cdata.cps); |
| 25 | allocator.free(cdata.nfd); | 31 | allocator.free(cdata.nfd); |
| 26 | } | 32 | } |
| 27 | 33 | ||
| 28 | @memset(cdata.nfd, &.{}); | 34 | @memset(cdata.nfd, &.{}); |
| 29 | 35 | ||
| 36 | var total_len: usize = 0; | ||
| 37 | |||
| 30 | while (true) { | 38 | while (true) { |
| 31 | const len: u8 = try reader.readInt(u8, endian); | 39 | const len: u8 = try reader.readInt(u8, endian); |
| 32 | if (len == 0) break; | 40 | if (len == 0) break; |
| 33 | const cp = try reader.readInt(u24, endian); | 41 | const cp = try reader.readInt(u24, endian); |
| 34 | _cp = cp; | 42 | total_cp = cp; |
| 35 | const nfd_cp = try allocator.alloc(u21, len - 1); | 43 | const nfd_cp = cdata.cps[total_len..][0 .. len - 1]; |
| 36 | errdefer allocator.free(nfd_cp); | ||
| 37 | for (0..len - 1) |i| { | 44 | for (0..len - 1) |i| { |
| 38 | nfd_cp[i] = @intCast(try reader.readInt(u24, endian)); | 45 | nfd_cp[i] = @intCast(try reader.readInt(u24, endian)); |
| 39 | } | 46 | } |
| @@ -41,14 +48,17 @@ pub fn init(allocator: mem.Allocator) !CanonData { | |||
| 41 | try cdata.nfc.put(allocator, nfd_cp[0..2].*, @intCast(cp)); | 48 | try cdata.nfc.put(allocator, nfd_cp[0..2].*, @intCast(cp)); |
| 42 | } | 49 | } |
| 43 | cdata.nfd[cp] = nfd_cp; | 50 | cdata.nfd[cp] = nfd_cp; |
| 51 | total_len += len - 1; | ||
| 44 | } | 52 | } |
| 45 | 53 | ||
| 54 | if (comptime magic.print) std.debug.print("CanonData magic number: {d}\n", .{total_len}); | ||
| 55 | |||
| 46 | return cdata; | 56 | return cdata; |
| 47 | } | 57 | } |
| 48 | 58 | ||
| 49 | pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void { | 59 | pub fn deinit(cdata: *CanonData, allocator: mem.Allocator) void { |
| 50 | cdata.nfc.deinit(allocator); | 60 | cdata.nfc.deinit(allocator); |
| 51 | for (cdata.nfd) |slice| allocator.free(slice); | 61 | allocator.free(cdata.cps); |
| 52 | allocator.free(cdata.nfd); | 62 | allocator.free(cdata.nfd); |
| 53 | } | 63 | } |
| 54 | 64 | ||
| @@ -66,3 +76,5 @@ const std = @import("std"); | |||
| 66 | const builtin = @import("builtin"); | 76 | const builtin = @import("builtin"); |
| 67 | const compress = std.compress; | 77 | const compress = std.compress; |
| 68 | const mem = std.mem; | 78 | const mem = std.mem; |
| 79 | const magic = @import("magic"); | ||
| 80 | const options = @import("options"); | ||
diff --git a/src/CaseFolding.zig b/src/CaseFolding.zig index 2e53bfa..f63b860 100644 --- a/src/CaseFolding.zig +++ b/src/CaseFolding.zig | |||
| @@ -310,14 +310,13 @@ fn testAllocations(allocator: Allocator) !void { | |||
| 310 | } | 310 | } |
| 311 | } | 311 | } |
| 312 | 312 | ||
| 313 | // test "Allocation Failures" { | 313 | test "Allocation Failures" { |
| 314 | // if (true) return error.SkipZigTest; // XXX: remove | 314 | try testing.checkAllAllocationFailures( |
| 315 | // try testing.checkAllAllocationFailures( | 315 | testing.allocator, |
| 316 | // testing.allocator, | 316 | testAllocations, |
| 317 | // testAllocations, | 317 | .{}, |
| 318 | // .{}, | 318 | ); |
| 319 | // ); | 319 | } |
| 320 | // } | ||
| 321 | 320 | ||
| 322 | const std = @import("std"); | 321 | const std = @import("std"); |
| 323 | const builtin = @import("builtin"); | 322 | const builtin = @import("builtin"); |
diff --git a/src/CompatData.zig b/src/CompatData.zig index d787103..794abca 100644 --- a/src/CompatData.zig +++ b/src/CompatData.zig | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | //! Compatibility Data | 1 | //! Compatibility Data |
| 2 | 2 | ||
| 3 | nfkd: [][]u21 = undefined, | 3 | nfkd: [][]u21 = undefined, |
| 4 | cps: []u21 = undefined, | ||
| 4 | 5 | ||
| 5 | const CompatData = @This(); | 6 | const CompatData = @This(); |
| 6 | 7 | ||
| @@ -15,27 +16,35 @@ pub fn init(allocator: mem.Allocator) !CompatData { | |||
| 15 | var cpdata = CompatData{ | 16 | var cpdata = CompatData{ |
| 16 | .nfkd = try allocator.alloc([]u21, 0x110000), | 17 | .nfkd = try allocator.alloc([]u21, 0x110000), |
| 17 | }; | 18 | }; |
| 19 | { | ||
| 20 | errdefer allocator.free(cpdata.nfkd); | ||
| 21 | cpdata.cps = try allocator.alloc(u21, magic.compat_size); | ||
| 22 | } | ||
| 18 | errdefer cpdata.deinit(allocator); | 23 | errdefer cpdata.deinit(allocator); |
| 19 | 24 | ||
| 20 | @memset(cpdata.nfkd, &.{}); | 25 | @memset(cpdata.nfkd, &.{}); |
| 21 | 26 | ||
| 27 | var total_len: usize = 0; | ||
| 28 | |||
| 22 | while (true) { | 29 | while (true) { |
| 23 | const len: u8 = try reader.readInt(u8, endian); | 30 | const len: u8 = try reader.readInt(u8, endian); |
| 24 | if (len == 0) break; | 31 | if (len == 0) break; |
| 25 | const cp = try reader.readInt(u24, endian); | 32 | const cp = try reader.readInt(u24, endian); |
| 26 | cpdata.nfkd[cp] = try allocator.alloc(u21, len - 1); | 33 | const nk_s = cpdata.cps[total_len..][0 .. len - 1]; |
| 27 | for (0..len - 1) |i| { | 34 | for (0..len - 1) |i| { |
| 28 | cpdata.nfkd[cp][i] = @intCast(try reader.readInt(u24, endian)); | 35 | nk_s[i] = @intCast(try reader.readInt(u24, endian)); |
| 29 | } | 36 | } |
| 37 | cpdata.nfkd[cp] = nk_s; | ||
| 38 | total_len += len - 1; | ||
| 30 | } | 39 | } |
| 31 | 40 | ||
| 41 | if (comptime magic.print) std.debug.print("CompatData magic number: {d}", .{total_len}); | ||
| 42 | |||
| 32 | return cpdata; | 43 | return cpdata; |
| 33 | } | 44 | } |
| 34 | 45 | ||
| 35 | pub fn deinit(cpdata: *const CompatData, allocator: mem.Allocator) void { | 46 | pub fn deinit(cpdata: *const CompatData, allocator: mem.Allocator) void { |
| 36 | for (cpdata.nfkd) |slice| { | 47 | allocator.free(cpdata.cps); |
| 37 | if (slice.len != 0) allocator.free(slice); | ||
| 38 | } | ||
| 39 | allocator.free(cpdata.nfkd); | 48 | allocator.free(cpdata.nfkd); |
| 40 | } | 49 | } |
| 41 | 50 | ||
| @@ -48,3 +57,4 @@ const std = @import("std"); | |||
| 48 | const builtin = @import("builtin"); | 57 | const builtin = @import("builtin"); |
| 49 | const compress = std.compress; | 58 | const compress = std.compress; |
| 50 | const mem = std.mem; | 59 | const mem = std.mem; |
| 60 | const magic = @import("magic"); | ||
diff --git a/src/DisplayWidth.zig b/src/DisplayWidth.zig index c0d6d96..4c63be4 100644 --- a/src/DisplayWidth.zig +++ b/src/DisplayWidth.zig | |||
| @@ -1,27 +1,18 @@ | |||
| 1 | const std = @import("std"); | 1 | //! Display Width module |
| 2 | const builtin = @import("builtin"); | 2 | //! |
| 3 | const options = @import("options"); | 3 | //! Answers questions about the printable width in monospaced fonts of the |
| 4 | const ArrayList = std.ArrayList; | 4 | //! string of interest. |
| 5 | const compress = std.compress; | ||
| 6 | const mem = std.mem; | ||
| 7 | const simd = std.simd; | ||
| 8 | const testing = std.testing; | ||
| 9 | |||
| 10 | const ascii = @import("ascii"); | ||
| 11 | const CodePointIterator = @import("code_point").Iterator; | ||
| 12 | pub const DisplayWidthData = @import("DisplayWidthData"); | ||
| 13 | 5 | ||
| 14 | const Graphemes = @import("Graphemes"); | 6 | graphemes: Graphemes = undefined, |
| 15 | |||
| 16 | graphemes: Graphemes, | ||
| 17 | s1: []u16 = undefined, | 7 | s1: []u16 = undefined, |
| 18 | s2: []i4 = undefined, | 8 | s2: []i4 = undefined, |
| 19 | owns_graphemes: bool, | 9 | owns_graphemes: bool = true, |
| 20 | 10 | ||
| 21 | const DisplayWidth = @This(); | 11 | const DisplayWidth = @This(); |
| 22 | 12 | ||
| 23 | pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | 13 | pub fn init(allocator: Allocator) Allocator.Error!DisplayWidth { |
| 24 | var dw: DisplayWidth = try DisplayWidth.setup(allocator); | 14 | var dw = DisplayWidth{}; |
| 15 | try dw.setup(allocator); | ||
| 25 | errdefer { | 16 | errdefer { |
| 26 | allocator.free(dw.s1); | 17 | allocator.free(dw.s1); |
| 27 | allocator.free(dw.s2); | 18 | allocator.free(dw.s2); |
| @@ -32,15 +23,16 @@ pub fn init(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | |||
| 32 | return dw; | 23 | return dw; |
| 33 | } | 24 | } |
| 34 | 25 | ||
| 35 | pub fn initWithGraphemes(allocator: mem.Allocator, graphemes: Graphemes) mem.Allocator.Error!DisplayWidth { | 26 | pub fn initWithGraphemes(allocator: Allocator, graphemes: Graphemes) Allocator.Error!DisplayWidth { |
| 36 | var dw = try DisplayWidth.setup(allocator); | 27 | var dw = DisplayWidth{}; |
| 28 | try dw.setup(allocator); | ||
| 37 | dw.graphemes = graphemes; | 29 | dw.graphemes = graphemes; |
| 38 | dw.owns_graphemes = false; | 30 | dw.owns_graphemes = false; |
| 39 | return dw; | 31 | return dw; |
| 40 | } | 32 | } |
| 41 | 33 | ||
| 42 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. | 34 | // Sets up the DisplayWidthData, leaving the GraphemeData undefined. |
| 43 | fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | 35 | fn setup(dw: *DisplayWidth, allocator: Allocator) Allocator.Error!void { |
| 44 | const decompressor = compress.flate.inflate.decompressor; | 36 | const decompressor = compress.flate.inflate.decompressor; |
| 45 | const in_bytes = @embedFile("dwp"); | 37 | const in_bytes = @embedFile("dwp"); |
| 46 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 38 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -49,8 +41,6 @@ fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | |||
| 49 | 41 | ||
| 50 | const endian = builtin.cpu.arch.endian(); | 42 | const endian = builtin.cpu.arch.endian(); |
| 51 | 43 | ||
| 52 | var dw: DisplayWidth = undefined; | ||
| 53 | |||
| 54 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; | 44 | const stage_1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 55 | dw.s1 = try allocator.alloc(u16, stage_1_len); | 45 | dw.s1 = try allocator.alloc(u16, stage_1_len); |
| 56 | errdefer allocator.free(dw.s1); | 46 | errdefer allocator.free(dw.s1); |
| @@ -60,11 +50,9 @@ fn setup(allocator: mem.Allocator) mem.Allocator.Error!DisplayWidth { | |||
| 60 | dw.s2 = try allocator.alloc(i4, stage_2_len); | 50 | dw.s2 = try allocator.alloc(i4, stage_2_len); |
| 61 | errdefer allocator.free(dw.s2); | 51 | errdefer allocator.free(dw.s2); |
| 62 | for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable); | 52 | for (0..stage_2_len) |i| dw.s2[i] = @intCast(reader.readInt(i8, endian) catch unreachable); |
| 63 | |||
| 64 | return dw; | ||
| 65 | } | 53 | } |
| 66 | 54 | ||
| 67 | pub fn deinit(dw: *const DisplayWidth, allocator: mem.Allocator) void { | 55 | pub fn deinit(dw: *const DisplayWidth, allocator: Allocator) void { |
| 68 | allocator.free(dw.s1); | 56 | allocator.free(dw.s1); |
| 69 | allocator.free(dw.s2); | 57 | allocator.free(dw.s2); |
| 70 | if (dw.owns_graphemes) dw.graphemes.deinit(allocator); | 58 | if (dw.owns_graphemes) dw.graphemes.deinit(allocator); |
| @@ -445,3 +433,35 @@ test "wrap" { | |||
| 445 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; | 433 | const want = "The quick \nbrown fox \njumped \nover the \nlazy dog!"; |
| 446 | try testing.expectEqualStrings(want, got); | 434 | try testing.expectEqualStrings(want, got); |
| 447 | } | 435 | } |
| 436 | |||
| 437 | fn testAllocation(allocator: Allocator) !void { | ||
| 438 | { | ||
| 439 | var dw = try DisplayWidth.init(allocator); | ||
| 440 | dw.deinit(allocator); | ||
| 441 | } | ||
| 442 | { | ||
| 443 | var graph = try Graphemes.init(allocator); | ||
| 444 | defer graph.deinit(allocator); | ||
| 445 | var dw = try DisplayWidth.initWithGraphemes(allocator, graph); | ||
| 446 | dw.deinit(allocator); | ||
| 447 | } | ||
| 448 | } | ||
| 449 | |||
| 450 | test "allocation test" { | ||
| 451 | try testing.checkAllAllocationFailures(testing.allocator, testAllocation, .{}); | ||
| 452 | } | ||
| 453 | |||
| 454 | const std = @import("std"); | ||
| 455 | const builtin = @import("builtin"); | ||
| 456 | const options = @import("options"); | ||
| 457 | const ArrayList = std.ArrayList; | ||
| 458 | const compress = std.compress; | ||
| 459 | const mem = std.mem; | ||
| 460 | const Allocator = mem.Allocator; | ||
| 461 | const simd = std.simd; | ||
| 462 | const testing = std.testing; | ||
| 463 | |||
| 464 | const ascii = @import("ascii"); | ||
| 465 | const CodePointIterator = @import("code_point").Iterator; | ||
| 466 | |||
| 467 | const Graphemes = @import("Graphemes"); | ||
diff --git a/src/GeneralCategories.zig b/src/GeneralCategories.zig index b7c82c0..3e76d82 100644 --- a/src/GeneralCategories.zig +++ b/src/GeneralCategories.zig | |||
| @@ -46,7 +46,16 @@ pub fn init(allocator: Allocator) Allocator.Error!GeneralCategories { | |||
| 46 | return gencat; | 46 | return gencat; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!void { | 49 | pub fn setup(gencat: *GeneralCategories, allocator: Allocator) Allocator.Error!void { |
| 50 | gencat.setupInner(allocator) catch |err| { | ||
| 51 | switch (err) { | ||
| 52 | error.OutOfMemory => |e| return e, | ||
| 53 | else => unreachable, | ||
| 54 | } | ||
| 55 | }; | ||
| 56 | } | ||
| 57 | |||
| 58 | inline fn setupInner(gencat: *GeneralCategories, allocator: Allocator) !void { | ||
| 50 | const decompressor = compress.flate.inflate.decompressor; | 59 | const decompressor = compress.flate.inflate.decompressor; |
| 51 | const in_bytes = @embedFile("gencat"); | 60 | const in_bytes = @embedFile("gencat"); |
| 52 | var in_fbs = std.io.fixedBufferStream(in_bytes); | 61 | var in_fbs = std.io.fixedBufferStream(in_bytes); |
| @@ -56,35 +65,35 @@ pub fn setup(self: *GeneralCategories, allocator: Allocator) Allocator.Error!voi | |||
| 56 | const endian = builtin.cpu.arch.endian(); | 65 | const endian = builtin.cpu.arch.endian(); |
| 57 | 66 | ||
| 58 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; | 67 | const s1_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 59 | self.s1 = try allocator.alloc(u16, s1_len); | 68 | gencat.s1 = try allocator.alloc(u16, s1_len); |
| 60 | errdefer allocator.free(self.s1); | 69 | errdefer allocator.free(gencat.s1); |
| 61 | for (0..s1_len) |i| self.s1[i] = try reader.readInt(u16, endian); | 70 | for (0..s1_len) |i| gencat.s1[i] = try reader.readInt(u16, endian); |
| 62 | 71 | ||
| 63 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; | 72 | const s2_len: u16 = reader.readInt(u16, endian) catch unreachable; |
| 64 | self.s2 = try allocator.alloc(u5, s2_len); | 73 | gencat.s2 = try allocator.alloc(u5, s2_len); |
| 65 | errdefer allocator.free(self.s2); | 74 | errdefer allocator.free(gencat.s2); |
| 66 | for (0..s2_len) |i| self.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | 75 | for (0..s2_len) |i| gencat.s2[i] = @intCast(reader.readInt(u8, endian) catch unreachable); |
| 67 | 76 | ||
| 68 | const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; | 77 | const s3_len: u16 = reader.readInt(u8, endian) catch unreachable; |
| 69 | self.s3 = try allocator.alloc(u5, s3_len); | 78 | gencat.s3 = try allocator.alloc(u5, s3_len); |
| 70 | errdefer allocator.free(self.s3); | 79 | errdefer allocator.free(gencat.s3); |
| 71 | for (0..s3_len) |i| self.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); | 80 | for (0..s3_len) |i| gencat.s3[i] = @intCast(reader.readInt(u8, endian) catch unreachable); |
| 72 | } | 81 | } |
| 73 | 82 | ||
| 74 | pub fn deinit(self: *const GeneralCategories, allocator: mem.Allocator) void { | 83 | pub fn deinit(gencat: *const GeneralCategories, allocator: mem.Allocator) void { |
| 75 | allocator.free(self.s1); | 84 | allocator.free(gencat.s1); |
| 76 | allocator.free(self.s2); | 85 | allocator.free(gencat.s2); |
| 77 | allocator.free(self.s3); | 86 | allocator.free(gencat.s3); |
| 78 | } | 87 | } |
| 79 | 88 | ||
| 80 | /// Lookup the General Category for `cp`. | 89 | /// Lookup the General Category for `cp`. |
| 81 | pub fn gc(self: GeneralCategories, cp: u21) Gc { | 90 | pub fn gc(gencat: GeneralCategories, cp: u21) Gc { |
| 82 | return @enumFromInt(self.s3[self.s2[self.s1[cp >> 8] + (cp & 0xff)]]); | 91 | return @enumFromInt(gencat.s3[gencat.s2[gencat.s1[cp >> 8] + (cp & 0xff)]]); |
| 83 | } | 92 | } |
| 84 | 93 | ||
| 85 | /// True if `cp` has an C general category. | 94 | /// True if `cp` has an C general category. |
| 86 | pub fn isControl(self: GeneralCategories, cp: u21) bool { | 95 | pub fn isControl(gencat: GeneralCategories, cp: u21) bool { |
| 87 | return switch (self.gc(cp)) { | 96 | return switch (gencat.gc(cp)) { |
| 88 | .Cc, | 97 | .Cc, |
| 89 | .Cf, | 98 | .Cf, |
| 90 | .Cn, | 99 | .Cn, |
| @@ -96,8 +105,8 @@ pub fn isControl(self: GeneralCategories, cp: u21) bool { | |||
| 96 | } | 105 | } |
| 97 | 106 | ||
| 98 | /// True if `cp` has an L general category. | 107 | /// True if `cp` has an L general category. |
| 99 | pub fn isLetter(self: GeneralCategories, cp: u21) bool { | 108 | pub fn isLetter(gencat: GeneralCategories, cp: u21) bool { |
| 100 | return switch (self.gc(cp)) { | 109 | return switch (gencat.gc(cp)) { |
| 101 | .Ll, | 110 | .Ll, |
| 102 | .Lm, | 111 | .Lm, |
| 103 | .Lo, | 112 | .Lo, |
| @@ -109,8 +118,8 @@ pub fn isLetter(self: GeneralCategories, cp: u21) bool { | |||
| 109 | } | 118 | } |
| 110 | 119 | ||
| 111 | /// True if `cp` has an M general category. | 120 | /// True if `cp` has an M general category. |
| 112 | pub fn isMark(self: GeneralCategories, cp: u21) bool { | 121 | pub fn isMark(gencat: GeneralCategories, cp: u21) bool { |
| 113 | return switch (self.gc(cp)) { | 122 | return switch (gencat.gc(cp)) { |
| 114 | .Mc, | 123 | .Mc, |
| 115 | .Me, | 124 | .Me, |
| 116 | .Mn, | 125 | .Mn, |
| @@ -120,8 +129,8 @@ pub fn isMark(self: GeneralCategories, cp: u21) bool { | |||
| 120 | } | 129 | } |
| 121 | 130 | ||
| 122 | /// True if `cp` has an N general category. | 131 | /// True if `cp` has an N general category. |
| 123 | pub fn isNumber(self: GeneralCategories, cp: u21) bool { | 132 | pub fn isNumber(gencat: GeneralCategories, cp: u21) bool { |
| 124 | return switch (self.gc(cp)) { | 133 | return switch (gencat.gc(cp)) { |
| 125 | .Nd, | 134 | .Nd, |
| 126 | .Nl, | 135 | .Nl, |
| 127 | .No, | 136 | .No, |
| @@ -131,8 +140,8 @@ pub fn isNumber(self: GeneralCategories, cp: u21) bool { | |||
| 131 | } | 140 | } |
| 132 | 141 | ||
| 133 | /// True if `cp` has an P general category. | 142 | /// True if `cp` has an P general category. |
| 134 | pub fn isPunctuation(self: GeneralCategories, cp: u21) bool { | 143 | pub fn isPunctuation(gencat: GeneralCategories, cp: u21) bool { |
| 135 | return switch (self.gc(cp)) { | 144 | return switch (gencat.gc(cp)) { |
| 136 | .Pc, | 145 | .Pc, |
| 137 | .Pd, | 146 | .Pd, |
| 138 | .Pe, | 147 | .Pe, |
| @@ -146,8 +155,8 @@ pub fn isPunctuation(self: GeneralCategories, cp: u21) bool { | |||
| 146 | } | 155 | } |
| 147 | 156 | ||
| 148 | /// True if `cp` has an S general category. | 157 | /// True if `cp` has an S general category. |
| 149 | pub fn isSymbol(self: GeneralCategories, cp: u21) bool { | 158 | pub fn isSymbol(gencat: GeneralCategories, cp: u21) bool { |
| 150 | return switch (self.gc(cp)) { | 159 | return switch (gencat.gc(cp)) { |
| 151 | .Sc, | 160 | .Sc, |
| 152 | .Sk, | 161 | .Sk, |
| 153 | .Sm, | 162 | .Sm, |
| @@ -158,8 +167,8 @@ pub fn isSymbol(self: GeneralCategories, cp: u21) bool { | |||
| 158 | } | 167 | } |
| 159 | 168 | ||
| 160 | /// True if `cp` has an Z general category. | 169 | /// True if `cp` has an Z general category. |
| 161 | pub fn isSeparator(self: GeneralCategories, cp: u21) bool { | 170 | pub fn isSeparator(gencat: GeneralCategories, cp: u21) bool { |
| 162 | return switch (self.gc(cp)) { | 171 | return switch (gencat.gc(cp)) { |
| 163 | .Zl, | 172 | .Zl, |
| 164 | .Zp, | 173 | .Zp, |
| 165 | .Zs, | 174 | .Zs, |
| @@ -168,8 +177,18 @@ pub fn isSeparator(self: GeneralCategories, cp: u21) bool { | |||
| 168 | }; | 177 | }; |
| 169 | } | 178 | } |
| 170 | 179 | ||
| 180 | fn testAllocator(allocator: Allocator) !void { | ||
| 181 | var gen_cat = try GeneralCategories.init(allocator); | ||
| 182 | gen_cat.deinit(allocator); | ||
| 183 | } | ||
| 184 | |||
| 185 | test "Allocation failure" { | ||
| 186 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 187 | } | ||
| 188 | |||
| 171 | const std = @import("std"); | 189 | const std = @import("std"); |
| 172 | const builtin = @import("builtin"); | 190 | const builtin = @import("builtin"); |
| 173 | const compress = std.compress; | 191 | const compress = std.compress; |
| 174 | const mem = std.mem; | 192 | const mem = std.mem; |
| 193 | const testing = std.testing; | ||
| 175 | const Allocator = mem.Allocator; | 194 | const Allocator = mem.Allocator; |
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig index a7260b8..11a3e96 100644 --- a/src/LetterCasing.zig +++ b/src/LetterCasing.zig | |||
| @@ -203,6 +203,15 @@ test "toLowerStr" { | |||
| 203 | try testing.expectEqualStrings("hello, world 2112!", lowered); | 203 | try testing.expectEqualStrings("hello, world 2112!", lowered); |
| 204 | } | 204 | } |
| 205 | 205 | ||
| 206 | fn testAllocator(allocator: Allocator) !void { | ||
| 207 | var prop = try LetterCasing.init(allocator); | ||
| 208 | prop.deinit(allocator); | ||
| 209 | } | ||
| 210 | |||
| 211 | test "Allocation failure" { | ||
| 212 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 213 | } | ||
| 214 | |||
| 206 | const std = @import("std"); | 215 | const std = @import("std"); |
| 207 | const builtin = @import("builtin"); | 216 | const builtin = @import("builtin"); |
| 208 | const compress = std.compress; | 217 | const compress = std.compress; |
diff --git a/src/Normalize.zig b/src/Normalize.zig index 1500b4c..989ec29 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -657,21 +657,6 @@ test "isLatin1Only" { | |||
| 657 | try testing.expect(!isLatin1Only(not_latin1_only)); | 657 | try testing.expect(!isLatin1Only(not_latin1_only)); |
| 658 | } | 658 | } |
| 659 | 659 | ||
| 660 | // NOTE: These tests take way waaaaay too long to run, because | ||
| 661 | // the amount of allocations in a couple of the inflators is | ||
| 662 | // completely excessive and is also costing memory for metadata. | ||
| 663 | // I'm leaving this here for when I fix that. | ||
| 664 | // | ||
| 665 | // fn testAllocations(allocator: Allocator) !void { | ||
| 666 | // const norm = try Normalize.init(allocator); | ||
| 667 | // norm.deinit(allocator); | ||
| 668 | // } | ||
| 669 | // | ||
| 670 | // test "allocation failures" { | ||
| 671 | // if (true) return error.SkipZigTest; | ||
| 672 | // try testing.checkAllAllocationFailures(testing.allocator, testAllocations, .{}); | ||
| 673 | // } | ||
| 674 | |||
| 675 | const std = @import("std"); | 660 | const std = @import("std"); |
| 676 | const debug = std.debug; | 661 | const debug = std.debug; |
| 677 | const assert = debug.assert; | 662 | const assert = debug.assert; |
diff --git a/src/Properties.zig b/src/Properties.zig index f7e57ec..73602a0 100644 --- a/src/Properties.zig +++ b/src/Properties.zig | |||
| @@ -169,6 +169,15 @@ test "Props" { | |||
| 169 | try testing.expect(!self.isDecimal('g')); | 169 | try testing.expect(!self.isDecimal('g')); |
| 170 | } | 170 | } |
| 171 | 171 | ||
| 172 | fn testAllocator(allocator: Allocator) !void { | ||
| 173 | var prop = try Properties.init(allocator); | ||
| 174 | prop.deinit(allocator); | ||
| 175 | } | ||
| 176 | |||
| 177 | test "Allocation failure" { | ||
| 178 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 179 | } | ||
| 180 | |||
| 172 | const std = @import("std"); | 181 | const std = @import("std"); |
| 173 | const builtin = @import("builtin"); | 182 | const builtin = @import("builtin"); |
| 174 | const compress = std.compress; | 183 | const compress = std.compress; |
diff --git a/src/Scripts.zig b/src/Scripts.zig index f71a2b5..fd5fde9 100644 --- a/src/Scripts.zig +++ b/src/Scripts.zig | |||
| @@ -233,6 +233,15 @@ test "script" { | |||
| 233 | try testing.expectEqual(Script.Latin, self.script('A').?); | 233 | try testing.expectEqual(Script.Latin, self.script('A').?); |
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | fn testAllocator(allocator: Allocator) !void { | ||
| 237 | var prop = try Scripts.init(allocator); | ||
| 238 | prop.deinit(allocator); | ||
| 239 | } | ||
| 240 | |||
| 241 | test "Allocation failure" { | ||
| 242 | try testing.checkAllAllocationFailures(testing.allocator, testAllocator, .{}); | ||
| 243 | } | ||
| 244 | |||
| 236 | const std = @import("std"); | 245 | const std = @import("std"); |
| 237 | const builtin = @import("builtin"); | 246 | const builtin = @import("builtin"); |
| 238 | const compress = std.compress; | 247 | const compress = std.compress; |
diff --git a/src/magic_numbers.zig b/src/magic_numbers.zig new file mode 100644 index 0000000..203bdfd --- /dev/null +++ b/src/magic_numbers.zig | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | //! 'Magic' numbers for codegen sizing | ||
| 2 | //! | ||
| 3 | //! These need to be updated for each Unicode version. | ||
| 4 | |||
| 5 | // Whether to print the magic numbers | ||
| 6 | pub const print = false; | ||
| 7 | |||
| 8 | // Don't want to crash while printing magic... | ||
| 9 | const fudge = if (print) 1000 else 0; | ||
| 10 | |||
| 11 | // Number of codepoints in CanonData.zig | ||
| 12 | pub const canon_size: usize = 3127 + fudge; | ||
| 13 | |||
| 14 | // Number of codepoitns in CompatData.zig | ||
| 15 | pub const compat_size: usize = 5612 + fudge; | ||
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig index 8b9069a..1c4b888 100644 --- a/src/unicode_tests.zig +++ b/src/unicode_tests.zig | |||
| @@ -208,7 +208,7 @@ test "Segmentation GraphemeIterator" { | |||
| 208 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); | 208 | // std.debug.print("\nline {}: {s}\n", .{ line_no, all_bytes.items }); |
| 209 | var iter = data.iterator(all_bytes.items); | 209 | var iter = data.iterator(all_bytes.items); |
| 210 | 210 | ||
| 211 | // Chaeck. | 211 | // Check. |
| 212 | for (want.items) |want_gc| { | 212 | for (want.items) |want_gc| { |
| 213 | const got_gc = (iter.next()).?; | 213 | const got_gc = (iter.next()).?; |
| 214 | try std.testing.expectEqualStrings( | 214 | try std.testing.expectEqualStrings( |