summaryrefslogtreecommitdiff
path: root/src/Graphemes.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-23 18:46:30 -0400
committerGravatar Sam Atman2025-05-23 18:46:30 -0400
commitc9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch)
tree1198b2fcb544bcef9f634cf507d848d82548f00a /src/Graphemes.zig
parentAdd iterateBefore and iterateAfter (diff)
downloadzg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip
Make offset size configurable
Hopefully I can talk users out of taking advantage of this configuration but I'll have better luck with that if it's available.
Diffstat (limited to 'src/Graphemes.zig')
-rw-r--r--src/Graphemes.zig20
1 files changed, 11 insertions, 9 deletions
diff --git a/src/Graphemes.zig b/src/Graphemes.zig
index 0338c04..49fdbf3 100644
--- a/src/Graphemes.zig
+++ b/src/Graphemes.zig
@@ -5,9 +5,11 @@ const Allocator = mem.Allocator;
5const compress = std.compress; 5const compress = std.compress;
6const unicode = std.unicode; 6const unicode = std.unicode;
7 7
8const CodePoint = @import("code_point").CodePoint; 8const code_point = @import("code_point");
9const CodePointIterator = @import("code_point").Iterator; 9const CodePoint = code_point.CodePoint;
10const CodePointReverseIterator = @import("code_point").ReverseIterator; 10const CodePointIterator = code_point.Iterator;
11const CodePointReverseIterator = code_point.ReverseIterator;
12const uoffset = code_point.uoffset;
11 13
12s1: []u16 = undefined, 14s1: []u16 = undefined,
13s2: []u16 = undefined, 15s2: []u16 = undefined,
@@ -104,8 +106,8 @@ pub const Gbp = enum {
104 106
105/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 107/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
106pub const Grapheme = struct { 108pub const Grapheme = struct {
107 len: u32, 109 len: uoffset,
108 offset: u32, 110 offset: uoffset,
109 111
110 /// `bytes` returns the slice of bytes that correspond to 112 /// `bytes` returns the slice of bytes that correspond to
111 /// this grapheme cluster in `src`. 113 /// this grapheme cluster in `src`.
@@ -199,7 +201,7 @@ pub const ReverseIterator = struct {
199 /// Count of pending RI codepoints, it is an even number 201 /// Count of pending RI codepoints, it is an even number
200 ri_count: usize, 202 ri_count: usize,
201 /// End of (Extend* ZWJ) sequence pending from failed GB11: !Emoji Extend* ZWJ x Emoji 203 /// End of (Extend* ZWJ) sequence pending from failed GB11: !Emoji Extend* ZWJ x Emoji
202 extend_end: u32, 204 extend_end: uoffset,
203 }; 205 };
204 206
205 const Self = @This(); 207 const Self = @This();
@@ -219,7 +221,7 @@ pub const ReverseIterator = struct {
219 pub fn prev(self: *Self) ?Grapheme { 221 pub fn prev(self: *Self) ?Grapheme {
220 if (self.buf[1] == null) return null; 222 if (self.buf[1] == null) return null;
221 223
222 const grapheme_end: u32 = end: { 224 const grapheme_end: uoffset = end: {
223 const codepoint = self.buf[1].?; 225 const codepoint = self.buf[1].?;
224 226
225 switch (self.pending) { 227 switch (self.pending) {
@@ -270,7 +272,7 @@ pub const ReverseIterator = struct {
270 if (!state.hasIndic()) { 272 if (!state.hasIndic()) {
271 273
272 // BUF: [?Any, Extend | Linker] Consonant 274 // BUF: [?Any, Extend | Linker] Consonant
273 var indic_offset: u32 = self.buf[1].?.offset + self.buf[1].?.len; 275 var indic_offset: uoffset = self.buf[1].?.offset + self.buf[1].?.len;
274 276
275 indic: while (true) { 277 indic: while (true) {
276 if (self.buf[0] == null) { 278 if (self.buf[0] == null) {
@@ -321,7 +323,7 @@ pub const ReverseIterator = struct {
321 323
322 if (!state.hasXpic()) { 324 if (!state.hasXpic()) {
323 // BUF: [?Any, ZWJ] Emoji 325 // BUF: [?Any, ZWJ] Emoji
324 var emoji_offset: u32 = self.buf[1].?.offset + self.buf[1].?.len; 326 var emoji_offset: uoffset = self.buf[1].?.offset + self.buf[1].?.len;
325 327
326 // Look for previous Emoji 328 // Look for previous Emoji
327 emoji: while (true) { 329 emoji: while (true) {