summaryrefslogtreecommitdiff
path: root/src/code_point.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-18 11:21:49 -0400
committerGravatar Jose Colon Rodriguez2024-02-18 11:21:49 -0400
commit08be45bfeb85bc809a492b9d0147052a028dd8ec (patch)
treee91eda437902090e3bde5fafbdfb92f2db369b7c /src/code_point.zig
parentTesting Ghostty's Utf8Decoder. A bit slower (diff)
downloadzg-08be45bfeb85bc809a492b9d0147052a028dd8ec.tar.gz
zg-08be45bfeb85bc809a492b9d0147052a028dd8ec.tar.xz
zg-08be45bfeb85bc809a492b9d0147052a028dd8ec.zip
Back to zg code_point. 4ms faster than Ghostty's Utf8Decoder
Diffstat (limited to 'src/code_point.zig')
-rw-r--r--src/code_point.zig68
1 files changed, 39 insertions, 29 deletions
diff --git a/src/code_point.zig b/src/code_point.zig
index 098e635..ac37562 100644
--- a/src/code_point.zig
+++ b/src/code_point.zig
@@ -3,29 +3,9 @@ const std = @import("std");
3/// `CodePoint` represents a Unicode code point by its code, 3/// `CodePoint` represents a Unicode code point by its code,
4/// length, and offset in the source bytes. 4/// length, and offset in the source bytes.
5pub const CodePoint = struct { 5pub const CodePoint = struct {
6 code: u21,
6 len: u3, 7 len: u3,
7 offset: u32, 8 offset: u32,
8
9 pub fn code(self: CodePoint, src: []const u8) u21 {
10 const cp_bytes = src[self.offset..][0..self.len];
11
12 return switch (self.len) {
13 1 => cp_bytes[0],
14
15 2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
16
17 3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
18 (cp_bytes[1] & 0b00111111)) << 6) |
19 (cp_bytes[2] & 0b00111111),
20
21 4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
22 (cp_bytes[1] & 0b00111111)) << 6) |
23 (cp_bytes[2] & 0b00111111)) << 6) |
24 (cp_bytes[3] & 0b00111111),
25
26 else => @panic("code_point.CodePoint.code: Invalid code point length."),
27 };
28 }
29}; 9};
30 10
31/// `Iterator` iterates a string one `CodePoint` at-a-time. 11/// `Iterator` iterates a string one `CodePoint` at-a-time.
@@ -39,20 +19,51 @@ pub const Iterator = struct {
39 if (self.bytes[self.i] < 128) { 19 if (self.bytes[self.i] < 128) {
40 // ASCII fast path 20 // ASCII fast path
41 defer self.i += 1; 21 defer self.i += 1;
42 return .{ .len = 1, .offset = self.i }; 22
23 return .{
24 .code = self.bytes[self.i],
25 .len = 1,
26 .offset = self.i,
27 };
43 } 28 }
44 29
45 const cp = CodePoint{ 30 var cp = CodePoint{
31 .code = undefined,
46 .len = switch (self.bytes[self.i]) { 32 .len = switch (self.bytes[self.i]) {
47 0b1100_0000...0b1101_1111 => 2, 33 0b1100_0000...0b1101_1111 => 2,
48 0b1110_0000...0b1110_1111 => 3, 34 0b1110_0000...0b1110_1111 => 3,
49 0b1111_0000...0b1111_0111 => 4, 35 0b1111_0000...0b1111_0111 => 4,
50 else => @panic("code_point.Iterator.next: Invalid start byte."), 36 else => {
37 defer self.i += 1;
38 // Unicode replacement code point.
39 return .{
40 .code = 0xfffd,
41 .len = 1,
42 .offset = self.i,
43 };
44 },
51 }, 45 },
52 .offset = self.i, 46 .offset = self.i,
53 }; 47 };
54 48
49 const cp_bytes = self.bytes[self.i..][0..cp.len];
55 self.i += cp.len; 50 self.i += cp.len;
51
52 cp.code = switch (cp.len) {
53 2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
54
55 3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
56 (cp_bytes[1] & 0b00111111)) << 6) |
57 (cp_bytes[2] & 0b00111111),
58
59 4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
60 (cp_bytes[1] & 0b00111111)) << 6) |
61 (cp_bytes[2] & 0b00111111)) << 6) |
62 (cp_bytes[3] & 0b00111111),
63
64 else => @panic("CodePointIterator.next invalid code point length."),
65 };
66
56 return cp; 67 return cp;
57 } 68 }
58 69
@@ -64,12 +75,11 @@ pub const Iterator = struct {
64}; 75};
65 76
66test "peek" { 77test "peek" {
67 const src = "Hi"; 78 var iter = Iterator{ .bytes = "Hi" };
68 var iter = Iterator{ .bytes = src };
69 79
70 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code(src)); 80 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
71 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code(src)); 81 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
72 try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code(src)); 82 try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
73 try std.testing.expectEqual(@as(?CodePoint, null), iter.peek()); 83 try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
74 try std.testing.expectEqual(@as(?CodePoint, null), iter.next()); 84 try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
75} 85}