summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-16 12:03:33 -0400
committerGravatar Sam Atman2025-05-16 12:03:33 -0400
commit9042273383de60f36a7938f0f0b49102117eef85 (patch)
tree38efa1dbceda1d0e332e53fdde8cb57ca8191ad4
parentMerge Grapheme Segmentation Iterator Tests (diff)
downloadzg-9042273383de60f36a7938f0f0b49102117eef85.tar.gz
zg-9042273383de60f36a7938f0f0b49102117eef85.tar.xz
zg-9042273383de60f36a7938f0f0b49102117eef85.zip
Proofread
-rw-r--r--NEWS.md6
-rw-r--r--src/WordBreak.zig11
2 files changed, 9 insertions, 8 deletions
diff --git a/NEWS.md b/NEWS.md
index a432c2f..8131878 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -52,9 +52,9 @@ UTF-8 into codepoints. Concerningly, this interpreted overlong
52sequences, which has been forbidden by Unicode for more than 20 years 52sequences, which has been forbidden by Unicode for more than 20 years
53due to the security risks involved. 53due to the security risks involved.
54 54
55This has been replaced with a DFA decoder based on the work of [Björn 55This has been replaced with a DFA decoder based on the work of
56Höhrmann][UTF], which has proven itself fast[^1] and reliable. This is 56[Björn Höhrmann][UTF], which has proven itself fast[^1] and reliable.
57a breaking change; sequences such as `"\xc0\xaf"` will no longer 57This is a breaking change; sequences such as `"\xc0\xaf"` will no longer
58produce the code `'/'`, nor will surrogates return their codepoint 58produce the code `'/'`, nor will surrogates return their codepoint
59value. 59value.
60 60
diff --git a/src/WordBreak.zig b/src/WordBreak.zig
index 6ada7e1..6a532f5 100644
--- a/src/WordBreak.zig
+++ b/src/WordBreak.zig
@@ -151,7 +151,8 @@ pub const Iterator = struct {
151 } 151 }
152 152
153 /// Returns a reverse iterator from the point this iterator is paused 153 /// Returns a reverse iterator from the point this iterator is paused
154 /// at. Usually, calling `prev()` will return the word just seen. 154 /// at. Usually, and always when using the API to create iterators,
155 /// calling `prev()` will return the word just seen.
155 pub fn reverseIterator(iter: *Iterator) ReverseIterator { 156 pub fn reverseIterator(iter: *Iterator) ReverseIterator {
156 var cp_it = iter.cp_iter.reverseIterator(); 157 var cp_it = iter.cp_iter.reverseIterator();
157 if (iter.that) |_| 158 if (iter.that) |_|
@@ -333,7 +334,8 @@ pub const ReverseIterator = struct {
333 } 334 }
334 335
335 /// Return a forward iterator from where this iterator paused. Usually, 336 /// Return a forward iterator from where this iterator paused. Usually,
336 /// calling `next()` will return the word just seen. 337 /// and always when using the API to create iterators, calling `next()`
338 /// will return the word just seen.
337 pub fn forwardIterator(iter: *ReverseIterator) Iterator { 339 pub fn forwardIterator(iter: *ReverseIterator) Iterator {
338 var cp_it = iter.cp_iter.forwardIterator(); 340 var cp_it = iter.cp_iter.forwardIterator();
339 if (iter.before) |_| 341 if (iter.before) |_|
@@ -508,9 +510,10 @@ pub const ReverseIterator = struct {
508 510
509//| Implementation Details 511//| Implementation Details
510 512
511/// Initialize a ReverseIterator at the provided index. Used in wordAtIndex. 513/// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`.
512fn initAtIndex(wb: *const WordBreak, string: []const u8, index: usize) ReverseIterator { 514fn initAtIndex(wb: *const WordBreak, string: []const u8, index: usize) ReverseIterator {
513 var idx: u32 = @intCast(index); 515 var idx: u32 = @intCast(index);
516 // Find the next lead byte:
514 while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} 517 while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {}
515 if (idx == string.len) return wb.reverseIterator(string); 518 if (idx == string.len) return wb.reverseIterator(string);
516 var iter: ReverseIterator = undefined; 519 var iter: ReverseIterator = undefined;
@@ -630,8 +633,6 @@ test "Word Break Properties" {
630 try testing.expectEqual(.LF, wb.breakProperty('\n')); 633 try testing.expectEqual(.LF, wb.breakProperty('\n'));
631 try testing.expectEqual(.Hebrew_Letter, wb.breakProperty('ש')); 634 try testing.expectEqual(.Hebrew_Letter, wb.breakProperty('ש'));
632 try testing.expectEqual(.Katakana, wb.breakProperty('\u{30ff}')); 635 try testing.expectEqual(.Katakana, wb.breakProperty('\u{30ff}'));
633 var iter = wb.iterator("xxx");
634 _ = iter.peek();
635} 636}
636 637
637test "ext_pict" { 638test "ext_pict" {