From 71f3570ab9259254b70a8a91b389ed698a33a264 Mon Sep 17 00:00:00 2001 From: Tim Culverhouse Date: Mon, 10 Jun 2024 13:48:15 -0500 Subject: codepoint: prevent panic when last cp too short If the last codepoint in a byte slice is incomplete (IE has a length of 3 but there are only 2 bytes remaining), the iterator will panic. Instead of panicking, prefer to return a replacement character. This strategy is similar to that in the block just above which returns a replacement character if the first byte is not valid. In this latter block, we also consume only one byte and allow the iterator to continue. This allows for sections of text which may have a single byte incorrect near the end of the slice. --- src/code_point.zig | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/code_point.zig b/src/code_point.zig index ac37562..2f2e80f 100644 --- a/src/code_point.zig +++ b/src/code_point.zig @@ -46,6 +46,17 @@ pub const Iterator = struct { .offset = self.i, }; + // Return replacement if we don' have a complete codepoint remaining. Consumes only one byte + if (self.i + cp.len > self.bytes.len) { + defer self.i += 1; + // Unicode replacement code point. + return .{ + .code = 0xfffd, + .len = 1, + .offset = self.i, + }; + } + const cp_bytes = self.bytes[self.i..][0..cp.len]; self.i += cp.len; -- cgit v1.2.3