summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-07-08 12:12:20 -0400
committerGravatar Sam Atman2025-07-08 12:12:20 -0400
commite3082e64b3ab8a8aa0777d63be69eb8b6d50a654 (patch)
tree2607c185fd8053b84d60041fadc35c05a0225d34 /src
parentAdd graphemeAtIndex + iterate before and after (diff)
downloadzg-e3082e64b3ab8a8aa0777d63be69eb8b6d50a654.tar.gz
zg-e3082e64b3ab8a8aa0777d63be69eb8b6d50a654.tar.xz
zg-e3082e64b3ab8a8aa0777d63be69eb8b6d50a654.zip
Add Words.zig example to README
Diffstat (limited to 'src')
-rw-r--r--src/Words.zig17
-rw-r--r--src/code_point.zig3
2 files changed, 20 insertions, 0 deletions
diff --git a/src/Words.zig b/src/Words.zig
index af82562..617c34d 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -674,6 +674,23 @@ test "ext_pict" {
674 try testing.expect(ext_pict.isMatch("\u{2701}")); 674 try testing.expect(ext_pict.isMatch("\u{2701}"));
675} 675}
676 676
677test "Words" {
678 const wb = try Words.init(testing.allocator);
679 defer wb.deinit(testing.allocator);
680 const word_str = "Metonym Μετωνύμιο メトニム";
681 var w_iter = wb.iterator(word_str);
682 try testing.expectEqualStrings("Metonym", w_iter.next().?.bytes(word_str));
683 // Spaces are "words" too!
684 try testing.expectEqualStrings(" ", w_iter.next().?.bytes(word_str));
685 const in_greek = w_iter.next().?;
686 for (in_greek.offset..in_greek.offset + in_greek.len) |i| {
687 const at_index = wb.wordAtIndex(word_str, i).bytes(word_str);
688 try testing.expectEqualStrings("Μετωνύμιο", at_index);
689 }
690 _ = w_iter.next();
691 try testing.expectEqualStrings("メトニム", w_iter.next().?.bytes(word_str));
692}
693
677test wordAtIndex { 694test wordAtIndex {
678 const wb = try Words.init(testing.allocator); 695 const wb = try Words.init(testing.allocator);
679 defer wb.deinit(testing.allocator); 696 defer wb.deinit(testing.allocator);
diff --git a/src/code_point.zig b/src/code_point.zig
index 16648af..7a638af 100644
--- a/src/code_point.zig
+++ b/src/code_point.zig
@@ -121,6 +121,9 @@ pub fn decodeAtCursor(bytes: []const u8, cursor: *uoffset) ?CodePoint {
121 } 121 }
122 if (st == RUNE_REJECT or cursor.* == bytes.len) { 122 if (st == RUNE_REJECT or cursor.* == bytes.len) {
123 @branchHint(.cold); 123 @branchHint(.cold);
124 // This, and the branch below, detect truncation, the
125 // only invalid state handled differently by the Maximal
126 // Subparts algorithm.
124 if (state_dfa[@intCast(u8dfa[byte])] == RUNE_REJECT) { 127 if (state_dfa[@intCast(u8dfa[byte])] == RUNE_REJECT) {
125 cursor.* -= 2; // +1 128 cursor.* -= 2; // +1
126 return .{ 129 return .{