summaryrefslogtreecommitdiff
path: root/src/Words.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-23 18:46:30 -0400
committerGravatar Sam Atman2025-05-23 18:46:30 -0400
commitc9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch)
tree1198b2fcb544bcef9f634cf507d848d82548f00a /src/Words.zig
parentAdd iterateBefore and iterateAfter (diff)
downloadzg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip
Make offset size configurable
Hopefully I can talk users out of taking advantage of this configuration but I'll have better luck with that if it's available.
Diffstat (limited to 'src/Words.zig')
-rw-r--r--src/Words.zig14
1 files changed, 8 insertions, 6 deletions
diff --git a/src/Words.zig b/src/Words.zig
index 1d10b2a..1707881 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -53,8 +53,8 @@ pub fn deinit(words: *const Words, allocator: mem.Allocator) void {
53/// Represents a Unicode word span, as an offset into the source string 53/// Represents a Unicode word span, as an offset into the source string
54/// and the length of the word. 54/// and the length of the word.
55pub const Word = struct { 55pub const Word = struct {
56 offset: u32, 56 offset: uoffset,
57 len: u32, 57 len: uoffset,
58 58
59 /// Returns a slice of the word given the source string. 59 /// Returns a slice of the word given the source string.
60 pub fn bytes(word: Word, src: []const u8) []const u8 { 60 pub fn bytes(word: Word, src: []const u8) []const u8 {
@@ -183,7 +183,7 @@ pub const Iterator = struct {
183 if (iter.that == null) return Word{ .len = iter.this.?.len, .offset = iter.this.?.offset }; 183 if (iter.that == null) return Word{ .len = iter.this.?.len, .offset = iter.this.?.offset };
184 184
185 const word_start = iter.this.?.offset; 185 const word_start = iter.this.?.offset;
186 var word_len: u32 = 0; 186 var word_len: uoffset = 0;
187 187
188 // State variables. 188 // State variables.
189 var last_p: WordBreakProperty = .none; 189 var last_p: WordBreakProperty = .none;
@@ -364,7 +364,7 @@ pub const ReverseIterator = struct {
364 if (iter.before == null) return Word{ .len = iter.after.?.len, .offset = 0 }; 364 if (iter.before == null) return Word{ .len = iter.after.?.len, .offset = 0 };
365 365
366 const word_end = iter.after.?.offset + iter.after.?.len; 366 const word_end = iter.after.?.offset + iter.after.?.len;
367 var word_len: u32 = 0; 367 var word_len: uoffset = 0;
368 368
369 // State variables. 369 // State variables.
370 var last_p: WordBreakProperty = .none; 370 var last_p: WordBreakProperty = .none;
@@ -518,7 +518,7 @@ pub const ReverseIterator = struct {
518 518
519/// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`. 519/// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`.
520fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) ReverseIterator { 520fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) ReverseIterator {
521 var idx: u32 = @intCast(index); 521 var idx: uoffset = @intCast(index);
522 // Find the next lead byte: 522 // Find the next lead byte:
523 while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} 523 while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {}
524 if (idx == string.len) return words.reverseIterator(string); 524 if (idx == string.len) return words.reverseIterator(string);
@@ -537,7 +537,7 @@ fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) Rever
537} 537}
538 538
539fn forwardFromIndex(words: *const Words, string: []const u8, index: usize) Iterator { 539fn forwardFromIndex(words: *const Words, string: []const u8, index: usize) Iterator {
540 var idx: u32 = @intCast(index); 540 var idx: uoffset = @intCast(index);
541 if (idx == string.len) { 541 if (idx == string.len) {
542 return .{ 542 return .{
543 .cp_iter = .{ .bytes = string, .i = idx }, 543 .cp_iter = .{ .bytes = string, .i = idx },
@@ -746,6 +746,8 @@ const Allocator = mem.Allocator;
746const assert = std.debug.assert; 746const assert = std.debug.assert;
747const testing = std.testing; 747const testing = std.testing;
748 748
749const uoffset = code_point.uoffset;
750
749const code_point = @import("code_point"); 751const code_point = @import("code_point");
750const CodepointIterator = code_point.Iterator; 752const CodepointIterator = code_point.Iterator;
751const ReverseCodepointIterator = code_point.ReverseIterator; 753const ReverseCodepointIterator = code_point.ReverseIterator;