diff options
| author | 2025-05-23 18:46:30 -0400 | |
|---|---|---|
| committer | 2025-05-23 18:46:30 -0400 | |
| commit | c9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch) | |
| tree | 1198b2fcb544bcef9f634cf507d848d82548f00a /src/code_point.zig | |
| parent | Add iterateBefore and iterateAfter (diff) | |
| download | zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip | |
Make offset size configurable
Hopefully I can talk users out of taking advantage of this configuration
but I'll have better luck with that if it's available.
Diffstat (limited to 'src/code_point.zig')
| -rw-r--r-- | src/code_point.zig | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/src/code_point.zig b/src/code_point.zig index 9a84080..8bd3d5b 100644 --- a/src/code_point.zig +++ b/src/code_point.zig | |||
| @@ -4,12 +4,14 @@ | |||
| 4 | //! Represents invalid data according to the Replacement of Maximal | 4 | //! Represents invalid data according to the Replacement of Maximal |
| 5 | //! Subparts algorithm. | 5 | //! Subparts algorithm. |
| 6 | 6 | ||
| 7 | pub const uoffset = if (@import("config").fat_offset) u64 else u32; | ||
| 8 | |||
| 7 | /// `CodePoint` represents a Unicode code point by its code, | 9 | /// `CodePoint` represents a Unicode code point by its code, |
| 8 | /// length, and offset in the source bytes. | 10 | /// length, and offset in the source bytes. |
| 9 | pub const CodePoint = struct { | 11 | pub const CodePoint = struct { |
| 10 | code: u21, | 12 | code: u21, |
| 11 | len: u3, | 13 | len: u3, |
| 12 | offset: u32, | 14 | offset: uoffset, |
| 13 | 15 | ||
| 14 | /// Return the slice of this codepoint, given the original string. | 16 | /// Return the slice of this codepoint, given the original string. |
| 15 | pub inline fn bytes(cp: CodePoint, str: []const u8) []const u8 { | 17 | pub inline fn bytes(cp: CodePoint, str: []const u8) []const u8 { |
| @@ -27,8 +29,8 @@ pub const CodePoint = struct { | |||
| 27 | 29 | ||
| 28 | /// This function is deprecated and will be removed in a later release. | 30 | /// This function is deprecated and will be removed in a later release. |
| 29 | /// Use `decodeAtIndex` or `decodeAtCursor`. | 31 | /// Use `decodeAtIndex` or `decodeAtCursor`. |
| 30 | pub fn decode(bytes: []const u8, offset: u32) ?CodePoint { | 32 | pub fn decode(bytes: []const u8, offset: uoffset) ?CodePoint { |
| 31 | var off: u32 = 0; | 33 | var off: uoffset = 0; |
| 32 | var maybe_code = decodeAtCursor(bytes, &off); | 34 | var maybe_code = decodeAtCursor(bytes, &off); |
| 33 | if (maybe_code) |*code| { | 35 | if (maybe_code) |*code| { |
| 34 | code.offset = offset; | 36 | code.offset = offset; |
| @@ -38,14 +40,14 @@ pub fn decode(bytes: []const u8, offset: u32) ?CodePoint { | |||
| 38 | } | 40 | } |
| 39 | 41 | ||
| 40 | /// Decode the CodePoint, if any, at `bytes[idx]`. | 42 | /// Decode the CodePoint, if any, at `bytes[idx]`. |
| 41 | pub fn decodeAtIndex(bytes: []const u8, idx: u32) ?CodePoint { | 43 | pub fn decodeAtIndex(bytes: []const u8, idx: uoffset) ?CodePoint { |
| 42 | var off = idx; | 44 | var off = idx; |
| 43 | return decodeAtCursor(bytes, &off); | 45 | return decodeAtCursor(bytes, &off); |
| 44 | } | 46 | } |
| 45 | 47 | ||
| 46 | /// Decode the CodePoint, if any, at `bytes[cursor.*]`. After, the | 48 | /// Decode the CodePoint, if any, at `bytes[cursor.*]`. After, the |
| 47 | /// cursor will point at the next potential codepoint index. | 49 | /// cursor will point at the next potential codepoint index. |
| 48 | pub fn decodeAtCursor(bytes: []const u8, cursor: *u32) ?CodePoint { | 50 | pub fn decodeAtCursor(bytes: []const u8, cursor: *uoffset) ?CodePoint { |
| 49 | // EOS | 51 | // EOS |
| 50 | if (cursor.* >= bytes.len) return null; | 52 | if (cursor.* >= bytes.len) return null; |
| 51 | 53 | ||
| @@ -161,7 +163,7 @@ pub fn decodeAtCursor(bytes: []const u8, cursor: *u32) ?CodePoint { | |||
| 161 | /// `Iterator` iterates a string one `CodePoint` at-a-time. | 163 | /// `Iterator` iterates a string one `CodePoint` at-a-time. |
| 162 | pub const Iterator = struct { | 164 | pub const Iterator = struct { |
| 163 | bytes: []const u8, | 165 | bytes: []const u8, |
| 164 | i: u32 = 0, | 166 | i: uoffset = 0, |
| 165 | 167 | ||
| 166 | pub fn init(bytes: []const u8) Iterator { | 168 | pub fn init(bytes: []const u8) Iterator { |
| 167 | return .{ .bytes = bytes, .i = 0 }; | 169 | return .{ .bytes = bytes, .i = 0 }; |
| @@ -257,7 +259,7 @@ const class_mask: [12]u8 = .{ | |||
| 257 | 259 | ||
| 258 | pub const ReverseIterator = struct { | 260 | pub const ReverseIterator = struct { |
| 259 | bytes: []const u8, | 261 | bytes: []const u8, |
| 260 | i: ?u32, | 262 | i: ?uoffset, |
| 261 | 263 | ||
| 262 | pub fn init(str: []const u8) ReverseIterator { | 264 | pub fn init(str: []const u8) ReverseIterator { |
| 263 | var r_iter: ReverseIterator = undefined; | 265 | var r_iter: ReverseIterator = undefined; |