diff options
| author | 2025-05-23 18:46:30 -0400 | |
|---|---|---|
| committer | 2025-05-23 18:46:30 -0400 | |
| commit | c9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch) | |
| tree | 1198b2fcb544bcef9f634cf507d848d82548f00a /src/Words.zig | |
| parent | Add iterateBefore and iterateAfter (diff) | |
| download | zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip | |
Make offset size configurable
Hopefully I can talk users out of taking advantage of this configuration
but I'll have better luck with that if it's available.
Diffstat (limited to 'src/Words.zig')
| -rw-r--r-- | src/Words.zig | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/src/Words.zig b/src/Words.zig index 1d10b2a..1707881 100644 --- a/src/Words.zig +++ b/src/Words.zig | |||
| @@ -53,8 +53,8 @@ pub fn deinit(words: *const Words, allocator: mem.Allocator) void { | |||
| 53 | /// Represents a Unicode word span, as an offset into the source string | 53 | /// Represents a Unicode word span, as an offset into the source string |
| 54 | /// and the length of the word. | 54 | /// and the length of the word. |
| 55 | pub const Word = struct { | 55 | pub const Word = struct { |
| 56 | offset: u32, | 56 | offset: uoffset, |
| 57 | len: u32, | 57 | len: uoffset, |
| 58 | 58 | ||
| 59 | /// Returns a slice of the word given the source string. | 59 | /// Returns a slice of the word given the source string. |
| 60 | pub fn bytes(word: Word, src: []const u8) []const u8 { | 60 | pub fn bytes(word: Word, src: []const u8) []const u8 { |
| @@ -183,7 +183,7 @@ pub const Iterator = struct { | |||
| 183 | if (iter.that == null) return Word{ .len = iter.this.?.len, .offset = iter.this.?.offset }; | 183 | if (iter.that == null) return Word{ .len = iter.this.?.len, .offset = iter.this.?.offset }; |
| 184 | 184 | ||
| 185 | const word_start = iter.this.?.offset; | 185 | const word_start = iter.this.?.offset; |
| 186 | var word_len: u32 = 0; | 186 | var word_len: uoffset = 0; |
| 187 | 187 | ||
| 188 | // State variables. | 188 | // State variables. |
| 189 | var last_p: WordBreakProperty = .none; | 189 | var last_p: WordBreakProperty = .none; |
| @@ -364,7 +364,7 @@ pub const ReverseIterator = struct { | |||
| 364 | if (iter.before == null) return Word{ .len = iter.after.?.len, .offset = 0 }; | 364 | if (iter.before == null) return Word{ .len = iter.after.?.len, .offset = 0 }; |
| 365 | 365 | ||
| 366 | const word_end = iter.after.?.offset + iter.after.?.len; | 366 | const word_end = iter.after.?.offset + iter.after.?.len; |
| 367 | var word_len: u32 = 0; | 367 | var word_len: uoffset = 0; |
| 368 | 368 | ||
| 369 | // State variables. | 369 | // State variables. |
| 370 | var last_p: WordBreakProperty = .none; | 370 | var last_p: WordBreakProperty = .none; |
| @@ -518,7 +518,7 @@ pub const ReverseIterator = struct { | |||
| 518 | 518 | ||
| 519 | /// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`. | 519 | /// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`. |
| 520 | fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) ReverseIterator { | 520 | fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) ReverseIterator { |
| 521 | var idx: u32 = @intCast(index); | 521 | var idx: uoffset = @intCast(index); |
| 522 | // Find the next lead byte: | 522 | // Find the next lead byte: |
| 523 | while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} | 523 | while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} |
| 524 | if (idx == string.len) return words.reverseIterator(string); | 524 | if (idx == string.len) return words.reverseIterator(string); |
| @@ -537,7 +537,7 @@ fn reverseFromIndex(words: *const Words, string: []const u8, index: usize) Rever | |||
| 537 | } | 537 | } |
| 538 | 538 | ||
| 539 | fn forwardFromIndex(words: *const Words, string: []const u8, index: usize) Iterator { | 539 | fn forwardFromIndex(words: *const Words, string: []const u8, index: usize) Iterator { |
| 540 | var idx: u32 = @intCast(index); | 540 | var idx: uoffset = @intCast(index); |
| 541 | if (idx == string.len) { | 541 | if (idx == string.len) { |
| 542 | return .{ | 542 | return .{ |
| 543 | .cp_iter = .{ .bytes = string, .i = idx }, | 543 | .cp_iter = .{ .bytes = string, .i = idx }, |
| @@ -746,6 +746,8 @@ const Allocator = mem.Allocator; | |||
| 746 | const assert = std.debug.assert; | 746 | const assert = std.debug.assert; |
| 747 | const testing = std.testing; | 747 | const testing = std.testing; |
| 748 | 748 | ||
| 749 | const uoffset = code_point.uoffset; | ||
| 750 | |||
| 749 | const code_point = @import("code_point"); | 751 | const code_point = @import("code_point"); |
| 750 | const CodepointIterator = code_point.Iterator; | 752 | const CodepointIterator = code_point.Iterator; |
| 751 | const ReverseCodepointIterator = code_point.ReverseIterator; | 753 | const ReverseCodepointIterator = code_point.ReverseIterator; |