diff options
| author | 2024-06-10 13:48:15 -0500 | |
|---|---|---|
| committer | 2024-06-10 13:52:53 -0500 | |
| commit | 71f3570ab9259254b70a8a91b389ed698a33a264 (patch) | |
| tree | 44dd6944e087740d402edc9ae28daa5301e7d45f /src | |
| parent | build: use b.path everywhere (diff) | |
| download | zg-71f3570ab9259254b70a8a91b389ed698a33a264.tar.gz zg-71f3570ab9259254b70a8a91b389ed698a33a264.tar.xz zg-71f3570ab9259254b70a8a91b389ed698a33a264.zip | |
codepoint: prevent panic when last cp too short
If the last codepoint in a byte slice is incomplete (IE has a length of
3 but there are only 2 bytes remaining), the iterator will panic.
Instead of panicking, prefer to return a replacement character. This
strategy is similar to that in the block just above which returns a
replacement character if the first byte is not valid. In this latter
block, we also consume only one byte and allow the iterator to continue.
This allows for sections of text which may have a single byte incorrect
near the end of the slice.
Diffstat (limited to 'src')
| -rw-r--r-- | src/code_point.zig | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/src/code_point.zig b/src/code_point.zig index ac37562..2f2e80f 100644 --- a/src/code_point.zig +++ b/src/code_point.zig | |||
| @@ -46,6 +46,17 @@ pub const Iterator = struct { | |||
| 46 | .offset = self.i, | 46 | .offset = self.i, |
| 47 | }; | 47 | }; |
| 48 | 48 | ||
| 49 | // Return replacement if we don' have a complete codepoint remaining. Consumes only one byte | ||
| 50 | if (self.i + cp.len > self.bytes.len) { | ||
| 51 | defer self.i += 1; | ||
| 52 | // Unicode replacement code point. | ||
| 53 | return .{ | ||
| 54 | .code = 0xfffd, | ||
| 55 | .len = 1, | ||
| 56 | .offset = self.i, | ||
| 57 | }; | ||
| 58 | } | ||
| 59 | |||
| 49 | const cp_bytes = self.bytes[self.i..][0..cp.len]; | 60 | const cp_bytes = self.bytes[self.i..][0..cp.len]; |
| 50 | self.i += cp.len; | 61 | self.i += cp.len; |
| 51 | 62 | ||