Make offset size configurable

Hopefully I can talk users out of taking advantage of this configuration but I'll have better luck with that if it's available.
author: Sam Atman 2025-05-23 18:46:30 -0400
committer: Sam Atman 2025-05-23 18:46:30 -0400
commit: c9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch)
tree: 1198b2fcb544bcef9f634cf507d848d82548f00a /src/Words.zig
parent: Add iterateBefore and iterateAfter (diff)
download: zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip
1 files changed, 8 insertions, 6 deletions
diff --git a/src/Words.zig b/src/Words.zig
index 1d10b2a..1707881 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -53,8 +53,8 @@ pub fn deinit(words: *const Words, allocator: mem.Allocator) void {
 /// Represents a Unicode word span, as an offset into the source string
 /// and the length of the word.
 pub const Word = struct {
-    offset: u32,
+    offset: uoffset,
-    len: u32,
+    len: uoffset,
    /// Returns a slice of the word given the source string.
    pub fn bytes(word: Word, src: []const u8) []const u8 {
@@ -183,7 +183,7 @@ pub const Iterator = struct {
        if (iter.that == null) return Word{ .len = iter.this.?.len, .offset = iter.this.?.offset };
        const word_start = iter.this.?.offset;
-        var word_len: u32 = 0;
+        var word_len: uoffset = 0;
        // State variables.
        var last_p: WordBreakProperty = .none;
@@ -364,7 +364,7 @@ pub const ReverseIterator = struct {
        if (iter.before == null) return Word{ .len = iter.after.?.len, .offset = 0 };
        const word_end = iter.after.?.offset + iter.after.?.len;
-        var word_len: u32 = 0;
+        var word_len: uoffset = 0;
        // State variables.
        var last_p: WordBreakProperty = .none;
@@ -518,7 +518,7 @@ pub const ReverseIterator = struct {
 /// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`.
 fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) ReverseIterator {
-    var idx: u32 = @intCast(index);
+    var idx: uoffset = @intCast(index);
    // Find the next lead byte:
    while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {}
    if (idx == string.len) return words.reverseIterator(string);
@@ -537,7 +537,7 @@ fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) Rever
 }
 fn forwardFromIndex(words: *const Words, string: []const u8, index: usize) Iterator {
-    var idx: u32 = @intCast(index);
+    var idx: uoffset = @intCast(index);
    if (idx == string.len) {
        return .{
            .cp_iter = .{ .bytes = string, .i = idx },
@@ -746,6 +746,8 @@ const Allocator = mem.Allocator;
 const assert = std.debug.assert;
 const testing = std.testing;
+const uoffset = code_point.uoffset;
 const code_point = @import("code_point");
 const CodepointIterator = code_point.Iterator;
 const ReverseCodepointIterator = code_point.ReverseIterator;
author	Sam Atman	2025-05-23 18:46:30 -0400
committer	Sam Atman	2025-05-23 18:46:30 -0400
commit	c9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch)
tree	1198b2fcb544bcef9f634cf507d848d82548f00a /src/Words.zig
parent	Add iterateBefore and iterateAfter (diff)
download	zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip