diff options
| author | 2024-02-18 11:21:49 -0400 | |
|---|---|---|
| committer | 2024-02-18 11:21:49 -0400 | |
| commit | 08be45bfeb85bc809a492b9d0147052a028dd8ec (patch) | |
| tree | e91eda437902090e3bde5fafbdfb92f2db369b7c /src/cp2.zig | |
| parent | Testing Ghostty's Utf8Decoder. A bit slower (diff) | |
| download | zg-08be45bfeb85bc809a492b9d0147052a028dd8ec.tar.gz zg-08be45bfeb85bc809a492b9d0147052a028dd8ec.tar.xz zg-08be45bfeb85bc809a492b9d0147052a028dd8ec.zip | |
Back to zg code_point. 4ms faster than Ghostty's Utf8Decoder
Diffstat (limited to 'src/cp2.zig')
| -rw-r--r-- | src/cp2.zig | 69 |
1 files changed, 0 insertions, 69 deletions
diff --git a/src/cp2.zig b/src/cp2.zig deleted file mode 100644 index ae0f9da..0000000 --- a/src/cp2.zig +++ /dev/null | |||
| @@ -1,69 +0,0 @@ | |||
| 1 | const std = @import("std"); | ||
| 2 | |||
| 3 | const Utf8Decoder = @import("Utf8Decoder.zig"); | ||
| 4 | |||
| 5 | /// `CodePoint` represents a Unicode code point by its code, | ||
| 6 | /// length, and offset in the source bytes. | ||
| 7 | pub const CodePoint = struct { | ||
| 8 | code: u21, | ||
| 9 | len: u3, | ||
| 10 | offset: u32, | ||
| 11 | }; | ||
| 12 | |||
| 13 | /// `Iterator` iterates a string one `CodePoint` at-a-time. | ||
| 14 | pub const Iterator = struct { | ||
| 15 | bytes: []const u8, | ||
| 16 | decoder: Utf8Decoder = .{}, | ||
| 17 | i: u32 = 0, | ||
| 18 | |||
| 19 | pub fn next(self: *Iterator) ?CodePoint { | ||
| 20 | if (self.i >= self.bytes.len) return null; | ||
| 21 | |||
| 22 | if (self.bytes[self.i] < 128) { | ||
| 23 | // ASCII fast path | ||
| 24 | defer self.i += 1; | ||
| 25 | return .{ | ||
| 26 | .code = self.bytes[self.i], | ||
| 27 | .len = 1, | ||
| 28 | .offset = self.i, | ||
| 29 | }; | ||
| 30 | } | ||
| 31 | |||
| 32 | for (self.bytes[self.i..], 1..) |b, len| { | ||
| 33 | var consumed = false; | ||
| 34 | while (!consumed) { | ||
| 35 | const res = self.decoder.next(b); | ||
| 36 | consumed = res[1]; | ||
| 37 | |||
| 38 | if (res[0]) |code| { | ||
| 39 | defer self.i += @intCast(len); | ||
| 40 | |||
| 41 | return .{ | ||
| 42 | .code = code, | ||
| 43 | .len = @intCast(len), | ||
| 44 | .offset = self.i, | ||
| 45 | }; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | unreachable; | ||
| 51 | } | ||
| 52 | |||
| 53 | pub fn peek(self: *Iterator) ?CodePoint { | ||
| 54 | const saved_i = self.i; | ||
| 55 | defer self.i = saved_i; | ||
| 56 | return self.next(); | ||
| 57 | } | ||
| 58 | }; | ||
| 59 | |||
| 60 | test "peek" { | ||
| 61 | const src = "Hi"; | ||
| 62 | var iter = Iterator{ .bytes = src }; | ||
| 63 | |||
| 64 | try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); | ||
| 65 | try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); | ||
| 66 | try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code); | ||
| 67 | try std.testing.expectEqual(@as(?CodePoint, null), iter.peek()); | ||
| 68 | try std.testing.expectEqual(@as(?CodePoint, null), iter.next()); | ||
| 69 | } | ||