diff options
| author | 2024-02-17 09:50:50 -0400 | |
|---|---|---|
| committer | 2024-02-17 09:50:50 -0400 | |
| commit | 6c1a88471fc6444ee93d6ca0c64d0953a0d857ac (patch) | |
| tree | c9ac886559bd1117b75482ab690364a5e792ad2c /src/CodePoint.zig | |
| parent | isAsciiOnly SIMD tweaks (diff) | |
| download | zg-6c1a88471fc6444ee93d6ca0c64d0953a0d857ac.tar.gz zg-6c1a88471fc6444ee93d6ca0c64d0953a0d857ac.tar.xz zg-6c1a88471fc6444ee93d6ca0c64d0953a0d857ac.zip | |
GraphemeIterator ASCII optimization 3x faster
Diffstat (limited to 'src/CodePoint.zig')
| -rw-r--r-- | src/CodePoint.zig | 27 |
1 files changed, 15 insertions, 12 deletions
diff --git a/src/CodePoint.zig b/src/CodePoint.zig index c03ecac..1c1bec1 100644 --- a/src/CodePoint.zig +++ b/src/CodePoint.zig | |||
| @@ -18,26 +18,29 @@ pub const CodePointIterator = struct { | |||
| 18 | 18 | ||
| 19 | if (self.bytes[self.i] < 128) { | 19 | if (self.bytes[self.i] < 128) { |
| 20 | // ASCII fast path | 20 | // ASCII fast path |
| 21 | const cp = CodePoint{ | 21 | defer self.i += 1; |
| 22 | return .{ | ||
| 22 | .code = self.bytes[self.i], | 23 | .code = self.bytes[self.i], |
| 23 | .len = 1, | 24 | .len = 1, |
| 24 | .offset = self.i, | 25 | .offset = self.i, |
| 25 | }; | 26 | }; |
| 26 | |||
| 27 | self.i += 1; | ||
| 28 | |||
| 29 | return cp; | ||
| 30 | } | 27 | } |
| 31 | 28 | ||
| 32 | var cp = CodePoint{ | 29 | var cp = CodePoint{ |
| 33 | .code = undefined, | 30 | .code = undefined, |
| 34 | .len = blk: { | 31 | .len = switch (self.bytes[self.i]) { |
| 35 | break :blk switch (self.bytes[self.i]) { | 32 | 0b1100_0000...0b1101_1111 => 2, |
| 36 | 0b1100_0000...0b1101_1111 => 2, | 33 | 0b1110_0000...0b1110_1111 => 3, |
| 37 | 0b1110_0000...0b1110_1111 => 3, | 34 | 0b1111_0000...0b1111_0111 => 4, |
| 38 | 0b1111_0000...0b1111_0111 => 4, | 35 | else => { |
| 39 | else => @panic("CodePointIterator.next: Ivalid code point start byte."), | 36 | self.i += 1; |
| 40 | }; | 37 | // Unicode replacement code point. |
| 38 | return .{ | ||
| 39 | .code = 0xfffd, | ||
| 40 | .len = 1, | ||
| 41 | .offset = self.i - 1, | ||
| 42 | }; | ||
| 43 | }, | ||
| 41 | }, | 44 | }, |
| 42 | .offset = self.i, | 45 | .offset = self.i, |
| 43 | }; | 46 | }; |