summaryrefslogtreecommitdiff
path: root/src/code_point.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-02-18 09:20:19 -0400
committerGravatar Jose Colon Rodriguez2024-02-18 09:20:19 -0400
commitf913551d27e07f0a7c7e201ba3141fd3a6cbb47c (patch)
tree74bfe0fb0aa98d053b5ab76beec6fdc733026017 /src/code_point.zig
parentCode point and grapheme are now namespaces. (diff)
downloadzg-f913551d27e07f0a7c7e201ba3141fd3a6cbb47c.tar.gz
zg-f913551d27e07f0a7c7e201ba3141fd3a6cbb47c.tar.xz
zg-f913551d27e07f0a7c7e201ba3141fd3a6cbb47c.zip
Code point code is now a method not a field.
Diffstat (limited to '')
-rw-r--r--src/code_point.zig68
1 files changed, 29 insertions, 39 deletions
diff --git a/src/code_point.zig b/src/code_point.zig
index ac37562..098e635 100644
--- a/src/code_point.zig
+++ b/src/code_point.zig
@@ -3,9 +3,29 @@ const std = @import("std");
3/// `CodePoint` represents a Unicode code point by its code, 3/// `CodePoint` represents a Unicode code point by its code,
4/// length, and offset in the source bytes. 4/// length, and offset in the source bytes.
5pub const CodePoint = struct { 5pub const CodePoint = struct {
6 code: u21,
7 len: u3, 6 len: u3,
8 offset: u32, 7 offset: u32,
8
9 pub fn code(self: CodePoint, src: []const u8) u21 {
10 const cp_bytes = src[self.offset..][0..self.len];
11
12 return switch (self.len) {
13 1 => cp_bytes[0],
14
15 2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
16
17 3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
18 (cp_bytes[1] & 0b00111111)) << 6) |
19 (cp_bytes[2] & 0b00111111),
20
21 4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
22 (cp_bytes[1] & 0b00111111)) << 6) |
23 (cp_bytes[2] & 0b00111111)) << 6) |
24 (cp_bytes[3] & 0b00111111),
25
26 else => @panic("code_point.CodePoint.code: Invalid code point length."),
27 };
28 }
9}; 29};
10 30
11/// `Iterator` iterates a string one `CodePoint` at-a-time. 31/// `Iterator` iterates a string one `CodePoint` at-a-time.
@@ -19,51 +39,20 @@ pub const Iterator = struct {
19 if (self.bytes[self.i] < 128) { 39 if (self.bytes[self.i] < 128) {
20 // ASCII fast path 40 // ASCII fast path
21 defer self.i += 1; 41 defer self.i += 1;
22 42 return .{ .len = 1, .offset = self.i };
23 return .{
24 .code = self.bytes[self.i],
25 .len = 1,
26 .offset = self.i,
27 };
28 } 43 }
29 44
30 var cp = CodePoint{ 45 const cp = CodePoint{
31 .code = undefined,
32 .len = switch (self.bytes[self.i]) { 46 .len = switch (self.bytes[self.i]) {
33 0b1100_0000...0b1101_1111 => 2, 47 0b1100_0000...0b1101_1111 => 2,
34 0b1110_0000...0b1110_1111 => 3, 48 0b1110_0000...0b1110_1111 => 3,
35 0b1111_0000...0b1111_0111 => 4, 49 0b1111_0000...0b1111_0111 => 4,
36 else => { 50 else => @panic("code_point.Iterator.next: Invalid start byte."),
37 defer self.i += 1;
38 // Unicode replacement code point.
39 return .{
40 .code = 0xfffd,
41 .len = 1,
42 .offset = self.i,
43 };
44 },
45 }, 51 },
46 .offset = self.i, 52 .offset = self.i,
47 }; 53 };
48 54
49 const cp_bytes = self.bytes[self.i..][0..cp.len];
50 self.i += cp.len; 55 self.i += cp.len;
51
52 cp.code = switch (cp.len) {
53 2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
54
55 3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
56 (cp_bytes[1] & 0b00111111)) << 6) |
57 (cp_bytes[2] & 0b00111111),
58
59 4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
60 (cp_bytes[1] & 0b00111111)) << 6) |
61 (cp_bytes[2] & 0b00111111)) << 6) |
62 (cp_bytes[3] & 0b00111111),
63
64 else => @panic("CodePointIterator.next invalid code point length."),
65 };
66
67 return cp; 56 return cp;
68 } 57 }
69 58
@@ -75,11 +64,12 @@ pub const Iterator = struct {
75}; 64};
76 65
77test "peek" { 66test "peek" {
78 var iter = Iterator{ .bytes = "Hi" }; 67 const src = "Hi";
68 var iter = Iterator{ .bytes = src };
79 69
80 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); 70 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code(src));
81 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); 71 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code(src));
82 try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code); 72 try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code(src));
83 try std.testing.expectEqual(@as(?CodePoint, null), iter.peek()); 73 try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
84 try std.testing.expectEqual(@as(?CodePoint, null), iter.next()); 74 try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
85} 75}