diff options
| author | 2025-05-14 12:59:30 -0400 | |
|---|---|---|
| committer | 2025-05-15 15:32:43 -0400 | |
| commit | b3847c8d7d0b73fabf66276a3b82c7533e5f930e (patch) | |
| tree | f53e1867ea5be24cfb49ccbdaf60c5a36f01de5c | |
| parent | Peek tests for word iterators (diff) | |
| download | zg-b3847c8d7d0b73fabf66276a3b82c7533e5f930e.tar.gz zg-b3847c8d7d0b73fabf66276a3b82c7533e5f930e.tar.xz zg-b3847c8d7d0b73fabf66276a3b82c7533e5f930e.zip | |
Add reversal functions for word iterators
While of only occasional use in real programs, one thing these are good
for is reliably retrieving the word at a given index. Which turns out
to be.. tricky is the best word.
| -rw-r--r-- | src/WordBreak.zig | 83 |
1 files changed, 81 insertions, 2 deletions
diff --git a/src/WordBreak.zig b/src/WordBreak.zig index f1322ff..0925b2f 100644 --- a/src/WordBreak.zig +++ b/src/WordBreak.zig | |||
| @@ -130,6 +130,22 @@ pub const Iterator = struct { | |||
| 130 | return iter.next(); | 130 | return iter.next(); |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | /// Return a reverse iterator from the point this iterator is paused | ||
| 134 | /// at. Usually, calling `prev()` will return the word just seen. | ||
| 135 | pub fn reverseIterator(iter: *Iterator) ReverseIterator { | ||
| 136 | var cp_it = iter.cp_iter.reverseIterator(); | ||
| 137 | if (iter.that) |_| | ||
| 138 | _ = cp_it.prev(); | ||
| 139 | if (iter.cp_iter.peek()) |_| | ||
| 140 | _ = cp_it.prev(); | ||
| 141 | return .{ | ||
| 142 | .wb = iter.wb, | ||
| 143 | .before = cp_it.prev(), | ||
| 144 | .after = iter.that, | ||
| 145 | .cp_iter = cp_it, | ||
| 146 | }; | ||
| 147 | } | ||
| 148 | |||
| 133 | /// Returns the next word segment. | 149 | /// Returns the next word segment. |
| 134 | pub fn next(iter: *Iterator) ?Word { | 150 | pub fn next(iter: *Iterator) ?Word { |
| 135 | iter.advance(); | 151 | iter.advance(); |
| @@ -249,6 +265,13 @@ pub const Iterator = struct { | |||
| 249 | return Word{ .len = word_len, .offset = word_start }; | 265 | return Word{ .len = word_len, .offset = word_start }; |
| 250 | } | 266 | } |
| 251 | 267 | ||
| 268 | pub fn format(iter: Iterator, _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { | ||
| 269 | try writer.print( | ||
| 270 | "Iterator {{ .this = {any}, .that = {any} }}", | ||
| 271 | .{ iter.this, iter.that }, | ||
| 272 | ); | ||
| 273 | } | ||
| 274 | |||
| 252 | fn advance(iter: *Iterator) void { | 275 | fn advance(iter: *Iterator) void { |
| 253 | iter.this = iter.that; | 276 | iter.this = iter.that; |
| 254 | iter.that = iter.cp_iter.next(); | 277 | iter.that = iter.cp_iter.next(); |
| @@ -281,13 +304,27 @@ pub const ReverseIterator = struct { | |||
| 281 | 304 | ||
| 282 | /// Returns the previous word segment, without advancing. | 305 | /// Returns the previous word segment, without advancing. |
| 283 | pub fn peek(iter: *ReverseIterator) ?Word { | 306 | pub fn peek(iter: *ReverseIterator) ?Word { |
| 284 | const cache = .{ iter.before, iter.after, iter.cp_iter }; | 307 | const cache = .{ iter.before, iter.after, iter.cp_iter, iter.flags }; |
| 285 | defer { | 308 | defer { |
| 286 | iter.before, iter.after, iter.cp_iter = cache; | 309 | iter.before, iter.after, iter.cp_iter, iter.flags = cache; |
| 287 | } | 310 | } |
| 288 | return iter.prev(); | 311 | return iter.prev(); |
| 289 | } | 312 | } |
| 290 | 313 | ||
| 314 | /// Return a forward iterator from where this iterator paused. Usually, | ||
| 315 | /// calling `next()` will return the word just seen. | ||
| 316 | pub fn forwardIterator(iter: *ReverseIterator) Iterator { | ||
| 317 | var cp_it = iter.cp_iter.forwardIterator(); | ||
| 318 | if (iter.before) |_| | ||
| 319 | _ = cp_it.next(); | ||
| 320 | return .{ | ||
| 321 | .wb = iter.wb, | ||
| 322 | .this = cp_it.next(), | ||
| 323 | .that = iter.after, | ||
| 324 | .cp_iter = cp_it, | ||
| 325 | }; | ||
| 326 | } | ||
| 327 | |||
| 291 | /// Return the previous word, if any. | 328 | /// Return the previous word, if any. |
| 292 | pub fn prev(iter: *ReverseIterator) ?Word { | 329 | pub fn prev(iter: *ReverseIterator) ?Word { |
| 293 | iter.advance(); | 330 | iter.advance(); |
| @@ -425,6 +462,13 @@ pub const ReverseIterator = struct { | |||
| 425 | return Word{ .len = word_len, .offset = word_end - word_len }; | 462 | return Word{ .len = word_len, .offset = word_end - word_len }; |
| 426 | } | 463 | } |
| 427 | 464 | ||
| 465 | pub fn format(iter: ReverseIterator, _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { | ||
| 466 | try writer.print( | ||
| 467 | "ReverseIterator {{ .before = {any}, .after = {any}, .flags = {d} }}", | ||
| 468 | .{ iter.before, iter.after, iter.flags }, | ||
| 469 | ); | ||
| 470 | } | ||
| 471 | |||
| 428 | fn peekPast(iter: *ReverseIterator) ?CodePoint { | 472 | fn peekPast(iter: *ReverseIterator) ?CodePoint { |
| 429 | const save_cp = iter.cp_iter; | 473 | const save_cp = iter.cp_iter; |
| 430 | defer iter.cp_iter = save_cp; | 474 | defer iter.cp_iter = save_cp; |
| @@ -573,6 +617,41 @@ test wordAtCursor { | |||
| 573 | try testing.expectEqualStrings("third", last.bytes(t_string)); | 617 | try testing.expectEqualStrings("third", last.bytes(t_string)); |
| 574 | } | 618 | } |
| 575 | 619 | ||
| 620 | const testr = "don't a:ka fin!"; | ||
| 621 | |||
| 622 | test "reversal" { | ||
| 623 | const wb = try WordBreak.init(testing.allocator); | ||
| 624 | defer wb.deinit(testing.allocator); | ||
| 625 | { | ||
| 626 | var fwd = wb.iterator(testr); | ||
| 627 | var this_word: ?Word = fwd.next(); | ||
| 628 | |||
| 629 | while (this_word) |this| : (this_word = fwd.next()) { | ||
| 630 | var back = fwd.reverseIterator(); | ||
| 631 | const that_word = back.prev(); | ||
| 632 | if (that_word) |that| { | ||
| 633 | try testing.expectEqualStrings(this.bytes(testr), that.bytes(testr)); | ||
| 634 | } else { | ||
| 635 | try testing.expect(false); | ||
| 636 | } | ||
| 637 | } | ||
| 638 | } | ||
| 639 | { | ||
| 640 | var back = wb.reverseIterator(testr); | ||
| 641 | var this_word: ?Word = back.prev(); | ||
| 642 | |||
| 643 | while (this_word) |this| : (this_word = back.prev()) { | ||
| 644 | var fwd = back.forwardIterator(); | ||
| 645 | const that_word = fwd.next(); | ||
| 646 | if (that_word) |that| { | ||
| 647 | try testing.expectEqualStrings(this.bytes(testr), that.bytes(testr)); | ||
| 648 | } else { | ||
| 649 | try testing.expect(false); | ||
| 650 | } | ||
| 651 | } | ||
| 652 | } | ||
| 653 | } | ||
| 654 | |||
| 576 | fn testAllocations(allocator: Allocator) !void { | 655 | fn testAllocations(allocator: Allocator) !void { |
| 577 | const wb = try WordBreak.init(allocator); | 656 | const wb = try WordBreak.init(allocator); |
| 578 | wb.deinit(allocator); | 657 | wb.deinit(allocator); |