summaryrefslogtreecommitdiff
path: root/src/code_point.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-18 08:48:03 -0400
committerGravatar Jose Colon Rodriguez2024-02-18 08:48:03 -0400
commit1404c85f513a88bbd399ab9f3453da71e7478727 (patch)
tree0080678ceac38f223910d60bf650ebaddf27b0f9 /src/code_point.zig
parentFixed isAsciiOnly and CodePointIterator ASCII bugs (diff)
downloadzg-1404c85f513a88bbd399ab9f3453da71e7478727.tar.gz
zg-1404c85f513a88bbd399ab9f3453da71e7478727.tar.xz
zg-1404c85f513a88bbd399ab9f3453da71e7478727.zip
Code point and grapheme are now namespaces.
Diffstat (limited to '')
-rw-r--r--src/code_point.zig (renamed from src/CodePoint.zig)39
1 files changed, 20 insertions, 19 deletions
diff --git a/src/CodePoint.zig b/src/code_point.zig
index 62dd793..ac37562 100644
--- a/src/CodePoint.zig
+++ b/src/code_point.zig
@@ -1,28 +1,29 @@
1//! `CodePoint` represents a Unicode code point by its code, length, and offset in the source bytes.
2
3const std = @import("std"); 1const std = @import("std");
4 2
5code: u21, 3/// `CodePoint` represents a Unicode code point by its code,
6len: u3, 4/// length, and offset in the source bytes.
7offset: usize, 5pub const CodePoint = struct {
8 6 code: u21,
9const CodePoint = @This(); 7 len: u3,
8 offset: u32,
9};
10 10
11/// `CodePointIterator` iterates a string one `CodePoint` at-a-time. 11/// `Iterator` iterates a string one `CodePoint` at-a-time.
12pub const CodePointIterator = struct { 12pub const Iterator = struct {
13 bytes: []const u8, 13 bytes: []const u8,
14 i: usize = 0, 14 i: u32 = 0,
15 15
16 pub fn next(self: *CodePointIterator) ?CodePoint { 16 pub fn next(self: *Iterator) ?CodePoint {
17 if (self.i >= self.bytes.len) return null; 17 if (self.i >= self.bytes.len) return null;
18 18
19 if (self.bytes[self.i] < 128) { 19 if (self.bytes[self.i] < 128) {
20 // ASCII fast path 20 // ASCII fast path
21 self.i += 1; 21 defer self.i += 1;
22
22 return .{ 23 return .{
23 .code = self.bytes[self.i - 1], 24 .code = self.bytes[self.i],
24 .len = 1, 25 .len = 1,
25 .offset = self.i - 1, 26 .offset = self.i,
26 }; 27 };
27 } 28 }
28 29
@@ -33,12 +34,12 @@ pub const CodePointIterator = struct {
33 0b1110_0000...0b1110_1111 => 3, 34 0b1110_0000...0b1110_1111 => 3,
34 0b1111_0000...0b1111_0111 => 4, 35 0b1111_0000...0b1111_0111 => 4,
35 else => { 36 else => {
36 self.i += 1; 37 defer self.i += 1;
37 // Unicode replacement code point. 38 // Unicode replacement code point.
38 return .{ 39 return .{
39 .code = 0xfffd, 40 .code = 0xfffd,
40 .len = 1, 41 .len = 1,
41 .offset = self.i - 1, 42 .offset = self.i,
42 }; 43 };
43 }, 44 },
44 }, 45 },
@@ -66,15 +67,15 @@ pub const CodePointIterator = struct {
66 return cp; 67 return cp;
67 } 68 }
68 69
69 pub fn peek(self: *CodePointIterator) ?CodePoint { 70 pub fn peek(self: *Iterator) ?CodePoint {
70 const saved_i = self.i; 71 const saved_i = self.i;
71 defer self.i = saved_i; 72 defer self.i = saved_i;
72 return self.next(); 73 return self.next();
73 } 74 }
74}; 75};
75 76
76test "CodePointIterator peek" { 77test "peek" {
77 var iter = CodePointIterator{ .bytes = "Hi" }; 78 var iter = Iterator{ .bytes = "Hi" };
78 79
79 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); 80 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
80 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); 81 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);