diff options
| author | 2024-02-18 08:48:03 -0400 | |
|---|---|---|
| committer | 2024-02-18 08:48:03 -0400 | |
| commit | 1404c85f513a88bbd399ab9f3453da71e7478727 (patch) | |
| tree | 0080678ceac38f223910d60bf650ebaddf27b0f9 /src/code_point.zig | |
| parent | Fixed isAsciiOnly and CodePointIterator ASCII bugs (diff) | |
| download | zg-1404c85f513a88bbd399ab9f3453da71e7478727.tar.gz zg-1404c85f513a88bbd399ab9f3453da71e7478727.tar.xz zg-1404c85f513a88bbd399ab9f3453da71e7478727.zip | |
Code point and grapheme are now namespaces.
Diffstat (limited to '')
| -rw-r--r-- | src/code_point.zig (renamed from src/CodePoint.zig) | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/src/CodePoint.zig b/src/code_point.zig index 62dd793..ac37562 100644 --- a/src/CodePoint.zig +++ b/src/code_point.zig | |||
| @@ -1,28 +1,29 @@ | |||
| 1 | //! `CodePoint` represents a Unicode code point by its code, length, and offset in the source bytes. | ||
| 2 | |||
| 3 | const std = @import("std"); | 1 | const std = @import("std"); |
| 4 | 2 | ||
| 5 | code: u21, | 3 | /// `CodePoint` represents a Unicode code point by its code, |
| 6 | len: u3, | 4 | /// length, and offset in the source bytes. |
| 7 | offset: usize, | 5 | pub const CodePoint = struct { |
| 8 | 6 | code: u21, | |
| 9 | const CodePoint = @This(); | 7 | len: u3, |
| 8 | offset: u32, | ||
| 9 | }; | ||
| 10 | 10 | ||
| 11 | /// `CodePointIterator` iterates a string one `CodePoint` at-a-time. | 11 | /// `Iterator` iterates a string one `CodePoint` at-a-time. |
| 12 | pub const CodePointIterator = struct { | 12 | pub const Iterator = struct { |
| 13 | bytes: []const u8, | 13 | bytes: []const u8, |
| 14 | i: usize = 0, | 14 | i: u32 = 0, |
| 15 | 15 | ||
| 16 | pub fn next(self: *CodePointIterator) ?CodePoint { | 16 | pub fn next(self: *Iterator) ?CodePoint { |
| 17 | if (self.i >= self.bytes.len) return null; | 17 | if (self.i >= self.bytes.len) return null; |
| 18 | 18 | ||
| 19 | if (self.bytes[self.i] < 128) { | 19 | if (self.bytes[self.i] < 128) { |
| 20 | // ASCII fast path | 20 | // ASCII fast path |
| 21 | self.i += 1; | 21 | defer self.i += 1; |
| 22 | |||
| 22 | return .{ | 23 | return .{ |
| 23 | .code = self.bytes[self.i - 1], | 24 | .code = self.bytes[self.i], |
| 24 | .len = 1, | 25 | .len = 1, |
| 25 | .offset = self.i - 1, | 26 | .offset = self.i, |
| 26 | }; | 27 | }; |
| 27 | } | 28 | } |
| 28 | 29 | ||
| @@ -33,12 +34,12 @@ pub const CodePointIterator = struct { | |||
| 33 | 0b1110_0000...0b1110_1111 => 3, | 34 | 0b1110_0000...0b1110_1111 => 3, |
| 34 | 0b1111_0000...0b1111_0111 => 4, | 35 | 0b1111_0000...0b1111_0111 => 4, |
| 35 | else => { | 36 | else => { |
| 36 | self.i += 1; | 37 | defer self.i += 1; |
| 37 | // Unicode replacement code point. | 38 | // Unicode replacement code point. |
| 38 | return .{ | 39 | return .{ |
| 39 | .code = 0xfffd, | 40 | .code = 0xfffd, |
| 40 | .len = 1, | 41 | .len = 1, |
| 41 | .offset = self.i - 1, | 42 | .offset = self.i, |
| 42 | }; | 43 | }; |
| 43 | }, | 44 | }, |
| 44 | }, | 45 | }, |
| @@ -66,15 +67,15 @@ pub const CodePointIterator = struct { | |||
| 66 | return cp; | 67 | return cp; |
| 67 | } | 68 | } |
| 68 | 69 | ||
| 69 | pub fn peek(self: *CodePointIterator) ?CodePoint { | 70 | pub fn peek(self: *Iterator) ?CodePoint { |
| 70 | const saved_i = self.i; | 71 | const saved_i = self.i; |
| 71 | defer self.i = saved_i; | 72 | defer self.i = saved_i; |
| 72 | return self.next(); | 73 | return self.next(); |
| 73 | } | 74 | } |
| 74 | }; | 75 | }; |
| 75 | 76 | ||
| 76 | test "CodePointIterator peek" { | 77 | test "peek" { |
| 77 | var iter = CodePointIterator{ .bytes = "Hi" }; | 78 | var iter = Iterator{ .bytes = "Hi" }; |
| 78 | 79 | ||
| 79 | try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); | 80 | try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); |
| 80 | try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); | 81 | try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); |