summaryrefslogtreecommitdiff
path: root/src/CodePoint.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/CodePoint.zig')
-rw-r--r--src/CodePoint.zig84
1 files changed, 0 insertions, 84 deletions
diff --git a/src/CodePoint.zig b/src/CodePoint.zig
deleted file mode 100644
index 62dd793..0000000
--- a/src/CodePoint.zig
+++ /dev/null
@@ -1,84 +0,0 @@
1//! `CodePoint` represents a Unicode code point by its code, length, and offset in the source bytes.
2
3const std = @import("std");
4
5code: u21,
6len: u3,
7offset: usize,
8
9const CodePoint = @This();
10
11/// `CodePointIterator` iterates a string one `CodePoint` at-a-time.
12pub const CodePointIterator = struct {
13 bytes: []const u8,
14 i: usize = 0,
15
16 pub fn next(self: *CodePointIterator) ?CodePoint {
17 if (self.i >= self.bytes.len) return null;
18
19 if (self.bytes[self.i] < 128) {
20 // ASCII fast path
21 self.i += 1;
22 return .{
23 .code = self.bytes[self.i - 1],
24 .len = 1,
25 .offset = self.i - 1,
26 };
27 }
28
29 var cp = CodePoint{
30 .code = undefined,
31 .len = switch (self.bytes[self.i]) {
32 0b1100_0000...0b1101_1111 => 2,
33 0b1110_0000...0b1110_1111 => 3,
34 0b1111_0000...0b1111_0111 => 4,
35 else => {
36 self.i += 1;
37 // Unicode replacement code point.
38 return .{
39 .code = 0xfffd,
40 .len = 1,
41 .offset = self.i - 1,
42 };
43 },
44 },
45 .offset = self.i,
46 };
47
48 const cp_bytes = self.bytes[self.i..][0..cp.len];
49 self.i += cp.len;
50
51 cp.code = switch (cp.len) {
52 2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111),
53
54 3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) |
55 (cp_bytes[1] & 0b00111111)) << 6) |
56 (cp_bytes[2] & 0b00111111),
57
58 4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) |
59 (cp_bytes[1] & 0b00111111)) << 6) |
60 (cp_bytes[2] & 0b00111111)) << 6) |
61 (cp_bytes[3] & 0b00111111),
62
63 else => @panic("CodePointIterator.next invalid code point length."),
64 };
65
66 return cp;
67 }
68
69 pub fn peek(self: *CodePointIterator) ?CodePoint {
70 const saved_i = self.i;
71 defer self.i = saved_i;
72 return self.next();
73 }
74};
75
76test "CodePointIterator peek" {
77 var iter = CodePointIterator{ .bytes = "Hi" };
78
79 try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code);
80 try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code);
81 try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code);
82 try std.testing.expectEqual(@as(?CodePoint, null), iter.peek());
83 try std.testing.expectEqual(@as(?CodePoint, null), iter.next());
84}