summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Tim Culverhouse2024-06-10 13:48:15 -0500
committerGravatar Tim Culverhouse2024-06-10 13:52:53 -0500
commit71f3570ab9259254b70a8a91b389ed698a33a264 (patch)
tree44dd6944e087740d402edc9ae28daa5301e7d45f /src
parentbuild: use b.path everywhere (diff)
downloadzg-71f3570ab9259254b70a8a91b389ed698a33a264.tar.gz
zg-71f3570ab9259254b70a8a91b389ed698a33a264.tar.xz
zg-71f3570ab9259254b70a8a91b389ed698a33a264.zip
codepoint: prevent panic when last cp too short
If the last codepoint in a byte slice is incomplete (IE has a length of 3 but there are only 2 bytes remaining), the iterator will panic. Instead of panicking, prefer to return a replacement character. This strategy is similar to that in the block just above which returns a replacement character if the first byte is not valid. In this latter block, we also consume only one byte and allow the iterator to continue. This allows for sections of text which may have a single byte incorrect near the end of the slice.
Diffstat (limited to 'src')
-rw-r--r--src/code_point.zig11
1 files changed, 11 insertions, 0 deletions
diff --git a/src/code_point.zig b/src/code_point.zig
index ac37562..2f2e80f 100644
--- a/src/code_point.zig
+++ b/src/code_point.zig
@@ -46,6 +46,17 @@ pub const Iterator = struct {
46 .offset = self.i, 46 .offset = self.i,
47 }; 47 };
48 48
49 // Return replacement if we don' have a complete codepoint remaining. Consumes only one byte
50 if (self.i + cp.len > self.bytes.len) {
51 defer self.i += 1;
52 // Unicode replacement code point.
53 return .{
54 .code = 0xfffd,
55 .len = 1,
56 .offset = self.i,
57 };
58 }
59
49 const cp_bytes = self.bytes[self.i..][0..cp.len]; 60 const cp_bytes = self.bytes[self.i..][0..cp.len];
50 self.i += cp.len; 61 self.i += cp.len;
51 62