3 files changed, 105 insertions, 33 deletions
diff --git a/build.zig.zon b/build.zig.zon
index b69249f..3e1df95 100644
--- a/build.zig.zon
+++ b/build.zig.zon
@@ -1,6 +1,6 @@
 .{
    .name = .zg,
-    .version = "0.14.0",
+    .version = "0.14.1",
    .minimum_zig_version = "0.14.0",
    .fingerprint = 0x47df7778dc946aa0,
diff --git a/src/Words.zig b/src/Words.zig
index 565a2fb..1d10b2a 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -1,4 +1,7 @@
 //! Word Breaking Algorithm.
+//!
+//! https://www.unicode.org/reports/tr29/#Word_Boundaries
+//!
 const WordBreakProperty = enum(u5) {
    none,
@@ -42,9 +45,9 @@ pub fn setup(wb: *Words, allocator: Allocator) Allocator.Error!void {
    };
 }
-pub fn deinit(wordbreak: *const Words, allocator: mem.Allocator) void {
+pub fn deinit(words: *const Words, allocator: mem.Allocator) void {
-    allocator.free(wordbreak.s1);
+    allocator.free(words.s1);
-    allocator.free(wordbreak.s2);
+    allocator.free(words.s2);
 }
 /// Represents a Unicode word span, as an offset into the source string
@@ -54,51 +57,44 @@ pub const Word = struct {
    len: u32,
    /// Returns a slice of the word given the source string.
-    pub fn bytes(self: Word, src: []const u8) []const u8 {
+    pub fn bytes(word: Word, src: []const u8) []const u8 {
-        return src[self.offset..][0..self.len];
+        return src[word.offset..][0..word.len];
    }
 };
 /// Returns the word break property type for `cp`.
-pub fn breakProperty(wordbreak: *const Words, cp: u21) WordBreakProperty {
+pub fn breakProperty(words: *const Words, cp: u21) WordBreakProperty {
-    return @enumFromInt(wordbreak.s2[wordbreak.s1[cp >> 8] + (cp & 0xff)]);
+    return @enumFromInt(words.s2[words.s1[cp >> 8] + (cp & 0xff)]);
 }
 /// Convenience function for working with CodePoints
-fn breakProp(wb: *const Words, point: CodePoint) WordBreakProperty {
+fn breakProp(words: *const Words, point: CodePoint) WordBreakProperty {
-    return @enumFromInt(wb.s2[wb.s1[point.code >> 8] + (point.code & 0xff)]);
+    return @enumFromInt(words.s2[words.s1[point.code >> 8] + (point.code & 0xff)]);
 }
 /// Returns the Word at the given index.  Asserts that the index is less than
 /// `string.len`, and that the string is not empty. Always returns a word.
 /// The index does not have to be the start of a codepoint in the word.
-pub fn wordAtIndex(wordbreak: *const Words, string: []const u8, index: usize) Word {
+pub fn wordAtIndex(words: *const Words, string: []const u8, index: usize) Word {
    assert(index < string.len and string.len > 0);
-    var iter_back: ReverseIterator = initAtIndex(wordbreak, string, index);
+    var iter_back: ReverseIterator = reverseFromIndex(words, string, index);
    const first_back = iter_back.prev();
    if (first_back) |back| {
        if (back.offset == 0) {
-            var iter_fwd = wordbreak.iterator(string);
+            var iter_fwd = words.iterator(string);
            while (iter_fwd.next()) |word| {
                if (word.offset <= index and index < word.offset + word.len)
                    return word;
            }
        }
    } else {
-        var iter_fwd = wordbreak.iterator(string);
+        var iter_fwd = words.iterator(string);
        while (iter_fwd.next()) |word| {
            if (word.offset <= index and index < word.offset + word.len)
                return word;
        }
    }
-    const second_back = iter_back.prev();
+    _ = iter_back.prev();
-    if (second_back) |back| if (back.offset == 0) {
-        var iter_fwd = wordbreak.iterator(string);
-        while (iter_fwd.next()) |word| {
-            if (word.offset <= index and index < word.offset + word.len)
-                return word;
-        }
-    };
    // There's sometimes flags:
    if (iter_back.flags > 0) {
        while (iter_back.flags > 0) {
@@ -118,13 +114,23 @@ pub fn wordAtIndex(wordbreak: *const Words, string: []const u8, index: usize) Wo
 }
 /// Returns an iterator over words in `slice`.
-pub fn iterator(wordbreak: *const Words, slice: []const u8) Iterator {
+pub fn iterator(words: *const Words, slice: []const u8) Iterator {
-    return Iterator.init(wordbreak, slice);
+    return Iterator.init(words, slice);
 }
 /// Returns a reverse iterator over the words in `slice`.
-pub fn reverseIterator(wordbreak: *const Words, slice: []const u8) ReverseIterator {
+pub fn reverseIterator(words: *const Words, slice: []const u8) ReverseIterator {
-    return ReverseIterator.init(wordbreak, slice);
+    return ReverseIterator.init(words, slice);
+}
+/// Returns an iterator after the `word` in `slice`.
+pub fn iterateAfter(words: *const Words, slice: []const u8, word: Word) Iterator {
+    return forwardFromIndex(words, slice, word.offset + word.len);
+}
+/// Returns a reverse iterator before the `word` in `slice`.
+pub fn iterateBefore(words: *const Words, slice: []const u8, word: Word) ReverseIterator {
+    return reverseFromIndex(words, slice, word.offset);
 }
 /// An iterator, forward, over all words in a provided string.
@@ -135,8 +141,8 @@ pub const Iterator = struct {
    wb: *const Words,
    /// Assumes `str` is valid UTF-8.
-    pub fn init(wb: *const Words, str: []const u8) Iterator {
+    pub fn init(words: *const Words, str: []const u8) Iterator {
-        var wb_iter: Iterator = .{ .cp_iter = .init(str), .wb = wb };
+        var wb_iter: Iterator = .{ .cp_iter = .init(str), .wb = words };
        wb_iter.advance();
        return wb_iter;
    }
@@ -318,8 +324,8 @@ pub const ReverseIterator = struct {
    flags: usize = 0,
    /// Assumes `str` is valid UTF-8.
-    pub fn init(wb: *const Words, str: []const u8) ReverseIterator {
+    pub fn init(words: *const Words, str: []const u8) ReverseIterator {
-        var wb_iter: ReverseIterator = .{ .cp_iter = .init(str), .wb = wb };
+        var wb_iter: ReverseIterator = .{ .cp_iter = .init(str), .wb = words };
        wb_iter.advance();
        return wb_iter;
    }
@@ -511,13 +517,13 @@ pub const ReverseIterator = struct {
 //| Implementation Details
 /// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`.
-fn initAtIndex(wb: *const Words, string: []const u8, index: usize) ReverseIterator {
+fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) ReverseIterator {
    var idx: u32 = @intCast(index);
    // Find the next lead byte:
    while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {}
-    if (idx == string.len) return wb.reverseIterator(string);
+    if (idx == string.len) return words.reverseIterator(string);
    var iter: ReverseIterator = undefined;
-    iter.wb = wb;
+    iter.wb = words;
    iter.flags = 0;
    // We need to populate the CodePoints, and the codepoint iterator.
    // Consider "abc| def" with the cursor as |.
@@ -530,6 +536,34 @@ fn initAtIndex(wb: *const Words, string: []const u8, index: usize) ReverseIterat
    return iter;
 }
+fn forwardFromIndex(words: *const Words, string: []const u8, index: usize) Iterator {
+    var idx: u32 = @intCast(index);
+    if (idx == string.len) {
+        return .{
+            .cp_iter = .{ .bytes = string, .i = idx },
+            .this = null,
+            .that = null,
+            .wb = words,
+        };
+    }
+    while (idx > 0 and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx -= 1) {}
+    if (idx == 0) return words.iterator(string);
+    var iter: Iterator = undefined;
+    iter.wb = words;
+    // We need to populate the CodePoints, and the codepoint iterator.
+    // Consider "abc |def" with the cursor as |.
+    // We need `this` to be ` ` and `that` to be 'd',
+    // and `cp_iter.next()` to be `d`.
+    idx -= 1;
+    while (idx > 0 and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx -= 1) {}
+    // "abc| def"
+    var cp_iter: CodepointIterator = .{ .bytes = string, .i = idx };
+    iter.this = cp_iter.next();
+    iter.that = cp_iter.next();
+    iter.cp_iter = cp_iter;
+    return iter;
+}
 fn sneaky(iter: *const ReverseIterator) SneakIterator {
    return .{ .cp_iter = iter.cp_iter, .wb = iter.wb };
 }
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 18f1814..195fdcb 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -287,6 +287,25 @@ test "Segmentation Word Iterator" {
                } else {
                    try testing.expect(false);
                }
+                var peek_iter = wb.iterateAfter(this_str, got_word);
+                const peek_1 = peek_iter.next();
+                if (peek_1) |p1| {
+                    const peek_2 = iter.peek();
+                    if (peek_2) |p2| {
+                        std.testing.expectEqualSlices(
+                            u8,
+                            p1.bytes(this_str),
+                            p2.bytes(this_str),
+                        ) catch |err| {
+                            debug.print("Bad peek on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, idx });
+                            return err;
+                        };
+                    } else {
+                        try testing.expect(false);
+                    }
+                } else {
+                    try testing.expectEqual(null, iter.peek());
+                }
                for (got_word.offset..got_word.offset + got_word.len) |i| {
                    const this_word = wb.wordAtIndex(this_str, i);
                    std.testing.expectEqualSlices(
@@ -337,6 +356,25 @@ test "Segmentation Word Iterator" {
                } else {
                    try testing.expect(false);
                }
+                var peek_iter = wb.iterateBefore(this_str, got_word);
+                const peek_1 = peek_iter.prev();
+                if (peek_1) |p1| {
+                    const peek_2 = r_iter.peek();
+                    if (peek_2) |p2| {
+                        std.testing.expectEqualSlices(
+                            u8,
+                            p1.bytes(this_str),
+                            p2.bytes(this_str),
+                        ) catch |err| {
+                            debug.print("Bad peek on line {d} #{d} offset {d}\n", .{ line_iter.line, idx + 1, idx });
+                            return err;
+                        };
+                    } else {
+                        try testing.expect(false);
+                    }
+                } else {
+                    try testing.expectEqual(null, r_iter.peek());
+                }
                for (got_word.offset..got_word.offset + got_word.len) |i| {
                    const this_word = wb.wordAtIndex(this_str, i);
                    std.testing.expectEqualSlices(