summaryrefslogtreecommitdiff
path: root/src/code_point.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-05-23 18:46:30 -0400
committerGravatar Sam Atman2025-05-23 18:46:30 -0400
commitc9a1b3392973ee30e6a9a532f1da8605619b5b06 (patch)
tree1198b2fcb544bcef9f634cf507d848d82548f00a /src/code_point.zig
parentAdd iterateBefore and iterateAfter (diff)
downloadzg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.gz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.tar.xz
zg-c9a1b3392973ee30e6a9a532f1da8605619b5b06.zip
Make offset size configurable
Hopefully I can talk users out of taking advantage of this configuration but I'll have better luck with that if it's available.
Diffstat (limited to 'src/code_point.zig')
-rw-r--r--src/code_point.zig16
1 files changed, 9 insertions, 7 deletions
diff --git a/src/code_point.zig b/src/code_point.zig
index 9a84080..8bd3d5b 100644
--- a/src/code_point.zig
+++ b/src/code_point.zig
@@ -4,12 +4,14 @@
4//! Represents invalid data according to the Replacement of Maximal 4//! Represents invalid data according to the Replacement of Maximal
5//! Subparts algorithm. 5//! Subparts algorithm.
6 6
7pub const uoffset = if (@import("config").fat_offset) u64 else u32;
8
7/// `CodePoint` represents a Unicode code point by its code, 9/// `CodePoint` represents a Unicode code point by its code,
8/// length, and offset in the source bytes. 10/// length, and offset in the source bytes.
9pub const CodePoint = struct { 11pub const CodePoint = struct {
10 code: u21, 12 code: u21,
11 len: u3, 13 len: u3,
12 offset: u32, 14 offset: uoffset,
13 15
14 /// Return the slice of this codepoint, given the original string. 16 /// Return the slice of this codepoint, given the original string.
15 pub inline fn bytes(cp: CodePoint, str: []const u8) []const u8 { 17 pub inline fn bytes(cp: CodePoint, str: []const u8) []const u8 {
@@ -27,8 +29,8 @@ pub const CodePoint = struct {
27 29
28/// This function is deprecated and will be removed in a later release. 30/// This function is deprecated and will be removed in a later release.
29/// Use `decodeAtIndex` or `decodeAtCursor`. 31/// Use `decodeAtIndex` or `decodeAtCursor`.
30pub fn decode(bytes: []const u8, offset: u32) ?CodePoint { 32pub fn decode(bytes: []const u8, offset: uoffset) ?CodePoint {
31 var off: u32 = 0; 33 var off: uoffset = 0;
32 var maybe_code = decodeAtCursor(bytes, &off); 34 var maybe_code = decodeAtCursor(bytes, &off);
33 if (maybe_code) |*code| { 35 if (maybe_code) |*code| {
34 code.offset = offset; 36 code.offset = offset;
@@ -38,14 +40,14 @@ pub fn decode(bytes: []const u8, offset: u32) ?CodePoint {
38} 40}
39 41
40/// Decode the CodePoint, if any, at `bytes[idx]`. 42/// Decode the CodePoint, if any, at `bytes[idx]`.
41pub fn decodeAtIndex(bytes: []const u8, idx: u32) ?CodePoint { 43pub fn decodeAtIndex(bytes: []const u8, idx: uoffset) ?CodePoint {
42 var off = idx; 44 var off = idx;
43 return decodeAtCursor(bytes, &off); 45 return decodeAtCursor(bytes, &off);
44} 46}
45 47
46/// Decode the CodePoint, if any, at `bytes[cursor.*]`. After, the 48/// Decode the CodePoint, if any, at `bytes[cursor.*]`. After, the
47/// cursor will point at the next potential codepoint index. 49/// cursor will point at the next potential codepoint index.
48pub fn decodeAtCursor(bytes: []const u8, cursor: *u32) ?CodePoint { 50pub fn decodeAtCursor(bytes: []const u8, cursor: *uoffset) ?CodePoint {
49 // EOS 51 // EOS
50 if (cursor.* >= bytes.len) return null; 52 if (cursor.* >= bytes.len) return null;
51 53
@@ -161,7 +163,7 @@ pub fn decodeAtCursor(bytes: []const u8, cursor: *u32) ?CodePoint {
161/// `Iterator` iterates a string one `CodePoint` at-a-time. 163/// `Iterator` iterates a string one `CodePoint` at-a-time.
162pub const Iterator = struct { 164pub const Iterator = struct {
163 bytes: []const u8, 165 bytes: []const u8,
164 i: u32 = 0, 166 i: uoffset = 0,
165 167
166 pub fn init(bytes: []const u8) Iterator { 168 pub fn init(bytes: []const u8) Iterator {
167 return .{ .bytes = bytes, .i = 0 }; 169 return .{ .bytes = bytes, .i = 0 };
@@ -257,7 +259,7 @@ const class_mask: [12]u8 = .{
257 259
258pub const ReverseIterator = struct { 260pub const ReverseIterator = struct {
259 bytes: []const u8, 261 bytes: []const u8,
260 i: ?u32, 262 i: ?uoffset,
261 263
262 pub fn init(str: []const u8) ReverseIterator { 264 pub fn init(str: []const u8) ReverseIterator {
263 var r_iter: ReverseIterator = undefined; 265 var r_iter: ReverseIterator = undefined;