From 1404c85f513a88bbd399ab9f3453da71e7478727 Mon Sep 17 00:00:00 2001 From: Jose Colon Rodriguez Date: Sun, 18 Feb 2024 08:48:03 -0400 Subject: Code point and grapheme are now namespaces. --- src/CodePoint.zig | 84 ------------------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 src/CodePoint.zig (limited to 'src/CodePoint.zig') diff --git a/src/CodePoint.zig b/src/CodePoint.zig deleted file mode 100644 index 62dd793..0000000 --- a/src/CodePoint.zig +++ /dev/null @@ -1,84 +0,0 @@ -//! `CodePoint` represents a Unicode code point by its code, length, and offset in the source bytes. - -const std = @import("std"); - -code: u21, -len: u3, -offset: usize, - -const CodePoint = @This(); - -/// `CodePointIterator` iterates a string one `CodePoint` at-a-time. -pub const CodePointIterator = struct { - bytes: []const u8, - i: usize = 0, - - pub fn next(self: *CodePointIterator) ?CodePoint { - if (self.i >= self.bytes.len) return null; - - if (self.bytes[self.i] < 128) { - // ASCII fast path - self.i += 1; - return .{ - .code = self.bytes[self.i - 1], - .len = 1, - .offset = self.i - 1, - }; - } - - var cp = CodePoint{ - .code = undefined, - .len = switch (self.bytes[self.i]) { - 0b1100_0000...0b1101_1111 => 2, - 0b1110_0000...0b1110_1111 => 3, - 0b1111_0000...0b1111_0111 => 4, - else => { - self.i += 1; - // Unicode replacement code point. - return .{ - .code = 0xfffd, - .len = 1, - .offset = self.i - 1, - }; - }, - }, - .offset = self.i, - }; - - const cp_bytes = self.bytes[self.i..][0..cp.len]; - self.i += cp.len; - - cp.code = switch (cp.len) { - 2 => (@as(u21, (cp_bytes[0] & 0b00011111)) << 6) | (cp_bytes[1] & 0b00111111), - - 3 => (((@as(u21, (cp_bytes[0] & 0b00001111)) << 6) | - (cp_bytes[1] & 0b00111111)) << 6) | - (cp_bytes[2] & 0b00111111), - - 4 => (((((@as(u21, (cp_bytes[0] & 0b00000111)) << 6) | - (cp_bytes[1] & 0b00111111)) << 6) | - (cp_bytes[2] & 0b00111111)) << 6) | - (cp_bytes[3] & 0b00111111), - - else => @panic("CodePointIterator.next invalid code point length."), - }; - - return cp; - } - - pub fn peek(self: *CodePointIterator) ?CodePoint { - const saved_i = self.i; - defer self.i = saved_i; - return self.next(); - } -}; - -test "CodePointIterator peek" { - var iter = CodePointIterator{ .bytes = "Hi" }; - - try std.testing.expectEqual(@as(u21, 'H'), iter.next().?.code); - try std.testing.expectEqual(@as(u21, 'i'), iter.peek().?.code); - try std.testing.expectEqual(@as(u21, 'i'), iter.next().?.code); - try std.testing.expectEqual(@as(?CodePoint, null), iter.peek()); - try std.testing.expectEqual(@as(?CodePoint, null), iter.next()); -} -- cgit v1.2.3