From e3082e64b3ab8a8aa0777d63be69eb8b6d50a654 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Tue, 8 Jul 2025 12:12:20 -0400 Subject: Add Words.zig example to README --- src/Words.zig | 17 +++++++++++++++++ src/code_point.zig | 3 +++ 2 files changed, 20 insertions(+) (limited to 'src') diff --git a/src/Words.zig b/src/Words.zig index af82562..617c34d 100644 --- a/src/Words.zig +++ b/src/Words.zig @@ -674,6 +674,23 @@ test "ext_pict" { try testing.expect(ext_pict.isMatch("\u{2701}")); } +test "Words" { + const wb = try Words.init(testing.allocator); + defer wb.deinit(testing.allocator); + const word_str = "Metonym Μετωνύμιο メトニム"; + var w_iter = wb.iterator(word_str); + try testing.expectEqualStrings("Metonym", w_iter.next().?.bytes(word_str)); + // Spaces are "words" too! + try testing.expectEqualStrings(" ", w_iter.next().?.bytes(word_str)); + const in_greek = w_iter.next().?; + for (in_greek.offset..in_greek.offset + in_greek.len) |i| { + const at_index = wb.wordAtIndex(word_str, i).bytes(word_str); + try testing.expectEqualStrings("Μετωνύμιο", at_index); + } + _ = w_iter.next(); + try testing.expectEqualStrings("メトニム", w_iter.next().?.bytes(word_str)); +} + test wordAtIndex { const wb = try Words.init(testing.allocator); defer wb.deinit(testing.allocator); diff --git a/src/code_point.zig b/src/code_point.zig index 16648af..7a638af 100644 --- a/src/code_point.zig +++ b/src/code_point.zig @@ -121,6 +121,9 @@ pub fn decodeAtCursor(bytes: []const u8, cursor: *uoffset) ?CodePoint { } if (st == RUNE_REJECT or cursor.* == bytes.len) { @branchHint(.cold); + // This, and the branch below, detect truncation, the + // only invalid state handled differently by the Maximal + // Subparts algorithm. if (state_dfa[@intCast(u8dfa[byte])] == RUNE_REJECT) { cursor.* -= 2; // +1 return .{ -- cgit v1.2.3