diff options
| author | 2025-05-16 12:03:33 -0400 | |
|---|---|---|
| committer | 2025-05-16 12:03:33 -0400 | |
| commit | 9042273383de60f36a7938f0f0b49102117eef85 (patch) | |
| tree | 38efa1dbceda1d0e332e53fdde8cb57ca8191ad4 | |
| parent | Merge Grapheme Segmentation Iterator Tests (diff) | |
| download | zg-9042273383de60f36a7938f0f0b49102117eef85.tar.gz zg-9042273383de60f36a7938f0f0b49102117eef85.tar.xz zg-9042273383de60f36a7938f0f0b49102117eef85.zip | |
Proofread
| -rw-r--r-- | NEWS.md | 6 | ||||
| -rw-r--r-- | src/WordBreak.zig | 11 |
2 files changed, 9 insertions, 8 deletions
| @@ -52,9 +52,9 @@ UTF-8 into codepoints. Concerningly, this interpreted overlong | |||
| 52 | sequences, which has been forbidden by Unicode for more than 20 years | 52 | sequences, which has been forbidden by Unicode for more than 20 years |
| 53 | due to the security risks involved. | 53 | due to the security risks involved. |
| 54 | 54 | ||
| 55 | This has been replaced with a DFA decoder based on the work of [Björn | 55 | This has been replaced with a DFA decoder based on the work of |
| 56 | Höhrmann][UTF], which has proven itself fast[^1] and reliable. This is | 56 | [Björn Höhrmann][UTF], which has proven itself fast[^1] and reliable. |
| 57 | a breaking change; sequences such as `"\xc0\xaf"` will no longer | 57 | This is a breaking change; sequences such as `"\xc0\xaf"` will no longer |
| 58 | produce the code `'/'`, nor will surrogates return their codepoint | 58 | produce the code `'/'`, nor will surrogates return their codepoint |
| 59 | value. | 59 | value. |
| 60 | 60 | ||
diff --git a/src/WordBreak.zig b/src/WordBreak.zig index 6ada7e1..6a532f5 100644 --- a/src/WordBreak.zig +++ b/src/WordBreak.zig | |||
| @@ -151,7 +151,8 @@ pub const Iterator = struct { | |||
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | /// Returns a reverse iterator from the point this iterator is paused | 153 | /// Returns a reverse iterator from the point this iterator is paused |
| 154 | /// at. Usually, calling `prev()` will return the word just seen. | 154 | /// at. Usually, and always when using the API to create iterators, |
| 155 | /// calling `prev()` will return the word just seen. | ||
| 155 | pub fn reverseIterator(iter: *Iterator) ReverseIterator { | 156 | pub fn reverseIterator(iter: *Iterator) ReverseIterator { |
| 156 | var cp_it = iter.cp_iter.reverseIterator(); | 157 | var cp_it = iter.cp_iter.reverseIterator(); |
| 157 | if (iter.that) |_| | 158 | if (iter.that) |_| |
| @@ -333,7 +334,8 @@ pub const ReverseIterator = struct { | |||
| 333 | } | 334 | } |
| 334 | 335 | ||
| 335 | /// Return a forward iterator from where this iterator paused. Usually, | 336 | /// Return a forward iterator from where this iterator paused. Usually, |
| 336 | /// calling `next()` will return the word just seen. | 337 | /// and always when using the API to create iterators, calling `next()` |
| 338 | /// will return the word just seen. | ||
| 337 | pub fn forwardIterator(iter: *ReverseIterator) Iterator { | 339 | pub fn forwardIterator(iter: *ReverseIterator) Iterator { |
| 338 | var cp_it = iter.cp_iter.forwardIterator(); | 340 | var cp_it = iter.cp_iter.forwardIterator(); |
| 339 | if (iter.before) |_| | 341 | if (iter.before) |_| |
| @@ -508,9 +510,10 @@ pub const ReverseIterator = struct { | |||
| 508 | 510 | ||
| 509 | //| Implementation Details | 511 | //| Implementation Details |
| 510 | 512 | ||
| 511 | /// Initialize a ReverseIterator at the provided index. Used in wordAtIndex. | 513 | /// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`. |
| 512 | fn initAtIndex(wb: *const WordBreak, string: []const u8, index: usize) ReverseIterator { | 514 | fn initAtIndex(wb: *const WordBreak, string: []const u8, index: usize) ReverseIterator { |
| 513 | var idx: u32 = @intCast(index); | 515 | var idx: u32 = @intCast(index); |
| 516 | // Find the next lead byte: | ||
| 514 | while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} | 517 | while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} |
| 515 | if (idx == string.len) return wb.reverseIterator(string); | 518 | if (idx == string.len) return wb.reverseIterator(string); |
| 516 | var iter: ReverseIterator = undefined; | 519 | var iter: ReverseIterator = undefined; |
| @@ -630,8 +633,6 @@ test "Word Break Properties" { | |||
| 630 | try testing.expectEqual(.LF, wb.breakProperty('\n')); | 633 | try testing.expectEqual(.LF, wb.breakProperty('\n')); |
| 631 | try testing.expectEqual(.Hebrew_Letter, wb.breakProperty('ש')); | 634 | try testing.expectEqual(.Hebrew_Letter, wb.breakProperty('ש')); |
| 632 | try testing.expectEqual(.Katakana, wb.breakProperty('\u{30ff}')); | 635 | try testing.expectEqual(.Katakana, wb.breakProperty('\u{30ff}')); |
| 633 | var iter = wb.iterator("xxx"); | ||
| 634 | _ = iter.peek(); | ||
| 635 | } | 636 | } |
| 636 | 637 | ||
| 637 | test "ext_pict" { | 638 | test "ext_pict" { |