diff options
| author | 2024-02-18 11:14:43 -0400 | |
|---|---|---|
| committer | 2024-02-18 11:14:43 -0400 | |
| commit | 6c7da0b526959840240177c0defb680e76fecad6 (patch) | |
| tree | 78c426747ebd23f1e0034798f29c37d1a5893826 /src/cp2.zig | |
| parent | Rename to zg (diff) | |
| download | zg-6c7da0b526959840240177c0defb680e76fecad6.tar.gz zg-6c7da0b526959840240177c0defb680e76fecad6.tar.xz zg-6c7da0b526959840240177c0defb680e76fecad6.zip | |
Testing Ghostty's Utf8Decoder. A bit slower
Diffstat (limited to 'src/cp2.zig')
| -rw-r--r-- | src/cp2.zig | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/src/cp2.zig b/src/cp2.zig new file mode 100644 index 0000000..ae0f9da --- /dev/null +++ b/src/cp2.zig | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | |||
| 3 | const Utf8Decoder = @import("Utf8Decoder.zig"); | ||
| 4 | |||
| 5 | /// `CodePoint` represents a Unicode code point by its code, | ||
| 6 | /// length, and offset in the source bytes. | ||
| 7 | pub const CodePoint = struct { | ||
| 8 | code: u21, | ||
| 9 | len: u3, | ||
| 10 | offset: u32, | ||
| 11 | }; | ||
| 12 | |||
| 13 | /// `Iterator` iterates a string one `CodePoint` at-a-time. | ||
| 14 | pub const Iterator = struct { | ||
| 15 | bytes: []const u8, | ||
| 16 | decoder: Utf8Decoder = .{}, | ||
| 17 | i: u32 = 0, | ||
| 18 | |||
| 19 | pub fn next(self: *Iterator) ?CodePoint { | ||
| 20 | if (self.i >= self.bytes.len) return null; | ||
| 21 | |||
| 22 | if (self.bytes[self.i] < 128) { | ||
| 23 | // ASCII fast path | ||
| 24 | defer self.i += 1; | ||
| 25 | return .{ | ||
| 26 | .code = self.bytes[self.i], | ||
| 27 | .len = 1, | ||
| 28 | .offset = self.i, | ||
| 29 | }; | ||
| 30 | } | ||
| 31 | |||
| 32 | for (self.bytes[self.i..], 1..) |b, len| { | ||
| 33 | var consumed = false; | ||
| 34 | while (!consumed) { | ||
| 35 | const res = self.decoder.next(b); | ||
| 36 | consumed = res[1]; | ||
| 37 | |||
| 38 | if (res[0]) |code| { | ||
| 39 | defer self.i += @intCast(len); | ||
| 40 | |||
| 41 | return .{ | ||
| 42 | .code = code, | ||
| 43 | .len = @intCast(len), | ||
| 44 | .offset = self.i, | ||
| 45 | }; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | unreachable; | ||
| 51 | } | ||
| 52 | |||
| 53 | pub fn peek(self: *Iterator) ?CodePoint { | ||
| 54 | const saved_i = self.i; | ||
| 55 | defer self.i = saved_i; | ||
| 56 | return self.next(); | ||
| 57 | } | ||
| 58 | }; | ||
| 59 | |||
| 60 | test "peek" { | ||
| 61 | const src = "Hi"; | ||
| 62 | var iter = Iterator{ .bytes = src }; | ||
| 63 | |||
| 64 | try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); | ||
| 65 | try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); | ||
| 66 | try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code); | ||
| 67 | try std.testing.expectEqual(@as(?CodePoint, null), iter.peek()); | ||
| 68 | try std.testing.expectEqual(@as(?CodePoint, null), iter.next()); | ||
| 69 | } | ||