diff options
| author | 2025-05-23 18:46:30 -0400 | |
|---|---|---|
| committer | 2025-05-23 18:46:30 -0400 | |
| commit | c9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch) | |
| tree | 1198b2fcb544bcef9f634cf507d848d82548f00a /src/Graphemes.zig | |
| parent | Add iterateBefore and iterateAfter (diff) | |
| download | zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip | |
Make offset size configurable
Hopefully I can talk users out of taking advantage of this configuration
but I'll have better luck with that if it's available.
Diffstat (limited to 'src/Graphemes.zig')
| -rw-r--r-- | src/Graphemes.zig | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/Graphemes.zig b/src/Graphemes.zig index 0338c04..49fdbf3 100644 --- a/src/Graphemes.zig +++ b/src/Graphemes.zig | |||
| @@ -5,9 +5,11 @@ const Allocator = mem.Allocator; | |||
| 5 | const compress = std.compress; | 5 | const compress = std.compress; |
| 6 | const unicode = std.unicode; | 6 | const unicode = std.unicode; |
| 7 | 7 | ||
| 8 | const CodePoint = @import("code_point").CodePoint; | 8 | const code_point = @import("code_point"); |
| 9 | const CodePointIterator = @import("code_point").Iterator; | 9 | const CodePoint = code_point.CodePoint; |
| 10 | const CodePointReverseIterator = @import("code_point").ReverseIterator; | 10 | const CodePointIterator = code_point.Iterator; |
| 11 | const CodePointReverseIterator = code_point.ReverseIterator; | ||
| 12 | const uoffset = code_point.uoffset; | ||
| 11 | 13 | ||
| 12 | s1: []u16 = undefined, | 14 | s1: []u16 = undefined, |
| 13 | s2: []u16 = undefined, | 15 | s2: []u16 = undefined, |
| @@ -104,8 +106,8 @@ pub const Gbp = enum { | |||
| 104 | 106 | ||
| 105 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. | 107 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. |
| 106 | pub const Grapheme = struct { | 108 | pub const Grapheme = struct { |
| 107 | len: u32, | 109 | len: uoffset, |
| 108 | offset: u32, | 110 | offset: uoffset, |
| 109 | 111 | ||
| 110 | /// `bytes` returns the slice of bytes that correspond to | 112 | /// `bytes` returns the slice of bytes that correspond to |
| 111 | /// this grapheme cluster in `src`. | 113 | /// this grapheme cluster in `src`. |
| @@ -199,7 +201,7 @@ pub const ReverseIterator = struct { | |||
| 199 | /// Count of pending RI codepoints, it is an even number | 201 | /// Count of pending RI codepoints, it is an even number |
| 200 | ri_count: usize, | 202 | ri_count: usize, |
| 201 | /// End of (Extend* ZWJ) sequence pending from failed GB11: !Emoji Extend* ZWJ x Emoji | 203 | /// End of (Extend* ZWJ) sequence pending from failed GB11: !Emoji Extend* ZWJ x Emoji |
| 202 | extend_end: u32, | 204 | extend_end: uoffset, |
| 203 | }; | 205 | }; |
| 204 | 206 | ||
| 205 | const Self = @This(); | 207 | const Self = @This(); |
| @@ -219,7 +221,7 @@ pub const ReverseIterator = struct { | |||
| 219 | pub fn prev(self: *Self) ?Grapheme { | 221 | pub fn prev(self: *Self) ?Grapheme { |
| 220 | if (self.buf[1] == null) return null; | 222 | if (self.buf[1] == null) return null; |
| 221 | 223 | ||
| 222 | const grapheme_end: u32 = end: { | 224 | const grapheme_end: uoffset = end: { |
| 223 | const codepoint = self.buf[1].?; | 225 | const codepoint = self.buf[1].?; |
| 224 | 226 | ||
| 225 | switch (self.pending) { | 227 | switch (self.pending) { |
| @@ -270,7 +272,7 @@ pub const ReverseIterator = struct { | |||
| 270 | if (!state.hasIndic()) { | 272 | if (!state.hasIndic()) { |
| 271 | 273 | ||
| 272 | // BUF: [?Any, Extend | Linker] Consonant | 274 | // BUF: [?Any, Extend | Linker] Consonant |
| 273 | var indic_offset: u32 = self.buf[1].?.offset + self.buf[1].?.len; | 275 | var indic_offset: uoffset = self.buf[1].?.offset + self.buf[1].?.len; |
| 274 | 276 | ||
| 275 | indic: while (true) { | 277 | indic: while (true) { |
| 276 | if (self.buf[0] == null) { | 278 | if (self.buf[0] == null) { |
| @@ -321,7 +323,7 @@ pub const ReverseIterator = struct { | |||
| 321 | 323 | ||
| 322 | if (!state.hasXpic()) { | 324 | if (!state.hasXpic()) { |
| 323 | // BUF: [?Any, ZWJ] Emoji | 325 | // BUF: [?Any, ZWJ] Emoji |
| 324 | var emoji_offset: u32 = self.buf[1].?.offset + self.buf[1].?.len; | 326 | var emoji_offset: uoffset = self.buf[1].?.offset + self.buf[1].?.len; |
| 325 | 327 | ||
| 326 | // Look for previous Emoji | 328 | // Look for previous Emoji |
| 327 | emoji: while (true) { | 329 | emoji: while (true) { |