diff options
| -rw-r--r-- | README.md | 21 | ||||
| -rw-r--r-- | build.zig | 4 |
2 files changed, 23 insertions, 2 deletions
| @@ -519,3 +519,24 @@ test "Scripts" { | |||
| 519 | try expect(scripts.script('צ') == .Hebrew); | 519 | try expect(scripts.script('צ') == .Hebrew); |
| 520 | } | 520 | } |
| 521 | ``` | 521 | ``` |
| 522 | |||
| 523 | ## Limits | ||
| 524 | |||
| 525 | Iterators, and fragment types such as `CodePoint`, `Grapheme` and `Word`, use a | ||
| 526 | `u32` to store the offset into a string, and the length of the fragment | ||
| 527 | (`CodePoint` uses a `u3` for length, actually). | ||
| 528 | |||
| 529 | 4GiB is a lot of string. There are a few reasons to work with that much | ||
| 530 | string, log files primarily, but fewer to bring it all into memory at once, and | ||
| 531 | practically no reason at all to do anything to such a string without breaking | ||
| 532 | it into smaller piece to work with. | ||
| 533 | |||
| 534 | Also, Zig compiles on 32 bit systems, where `usize` is 32. Code running on | ||
| 535 | such systems has no choice but to handle slices in smaller pieces. In general, | ||
| 536 | if you want code to perform correctly when encountering multi- gigabyte | ||
| 537 | strings, you'll need to code for that, at a level one or two steps above that | ||
| 538 | in which you'll want to, for example, iterate some graphemes of that string. | ||
| 539 | |||
| 540 | That all said, `zg` modules can be passed the Boolean config option | ||
| 541 | `fat_offset`, which will make all of those data structures use a `u64` instead. | ||
| 542 | You don't actually want to do this. But you can. | ||
| @@ -14,7 +14,7 @@ pub fn build(b: *std.Build) void { | |||
| 14 | //| Options | 14 | //| Options |
| 15 | 15 | ||
| 16 | // Display width | 16 | // Display width |
| 17 | const cjk = b.option(bool, "cjk", "Ambiguous code points are wide (display width: 2).") orelse false; | 17 | const cjk = b.option(bool, "cjk", "Ambiguous code points are wide (display width: 2)") orelse false; |
| 18 | const dwp_options = b.addOptions(); | 18 | const dwp_options = b.addOptions(); |
| 19 | dwp_options.addOption(bool, "cjk", cjk); | 19 | dwp_options.addOption(bool, "cjk", cjk); |
| 20 | 20 | ||
| @@ -33,7 +33,7 @@ pub fn build(b: *std.Build) void { | |||
| 33 | dwp_options.addOption(?i4, "c1_width", c1_width); | 33 | dwp_options.addOption(?i4, "c1_width", c1_width); |
| 34 | 34 | ||
| 35 | //| Offset size | 35 | //| Offset size |
| 36 | const fat_offset = b.option(bool, "fat_offset", "Offsets in Iterators and data structures will be u64") orelse false; | 36 | const fat_offset = b.option(bool, "fat_offset", "Offsets in iterators and data structures will be u64") orelse false; |
| 37 | const size_config = b.addOptions(); | 37 | const size_config = b.addOptions(); |
| 38 | size_config.addOption(bool, "fat_offset", fat_offset); | 38 | size_config.addOption(bool, "fat_offset", fat_offset); |
| 39 | 39 | ||