diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Words.zig | 17 | ||||
| -rw-r--r-- | src/code_point.zig | 3 |
2 files changed, 20 insertions, 0 deletions
diff --git a/src/Words.zig b/src/Words.zig index af82562..617c34d 100644 --- a/src/Words.zig +++ b/src/Words.zig | |||
| @@ -674,6 +674,23 @@ test "ext_pict" { | |||
| 674 | try testing.expect(ext_pict.isMatch("\u{2701}")); | 674 | try testing.expect(ext_pict.isMatch("\u{2701}")); |
| 675 | } | 675 | } |
| 676 | 676 | ||
| 677 | test "Words" { | ||
| 678 | const wb = try Words.init(testing.allocator); | ||
| 679 | defer wb.deinit(testing.allocator); | ||
| 680 | const word_str = "Metonym Μετωνύμιο メトニム"; | ||
| 681 | var w_iter = wb.iterator(word_str); | ||
| 682 | try testing.expectEqualStrings("Metonym", w_iter.next().?.bytes(word_str)); | ||
| 683 | // Spaces are "words" too! | ||
| 684 | try testing.expectEqualStrings(" ", w_iter.next().?.bytes(word_str)); | ||
| 685 | const in_greek = w_iter.next().?; | ||
| 686 | for (in_greek.offset..in_greek.offset + in_greek.len) |i| { | ||
| 687 | const at_index = wb.wordAtIndex(word_str, i).bytes(word_str); | ||
| 688 | try testing.expectEqualStrings("Μετωνύμιο", at_index); | ||
| 689 | } | ||
| 690 | _ = w_iter.next(); | ||
| 691 | try testing.expectEqualStrings("メトニム", w_iter.next().?.bytes(word_str)); | ||
| 692 | } | ||
| 693 | |||
| 677 | test wordAtIndex { | 694 | test wordAtIndex { |
| 678 | const wb = try Words.init(testing.allocator); | 695 | const wb = try Words.init(testing.allocator); |
| 679 | defer wb.deinit(testing.allocator); | 696 | defer wb.deinit(testing.allocator); |
diff --git a/src/code_point.zig b/src/code_point.zig index 16648af..7a638af 100644 --- a/src/code_point.zig +++ b/src/code_point.zig | |||
| @@ -121,6 +121,9 @@ pub fn decodeAtCursor(bytes: []const u8, cursor: *uoffset) ?CodePoint { | |||
| 121 | } | 121 | } |
| 122 | if (st == RUNE_REJECT or cursor.* == bytes.len) { | 122 | if (st == RUNE_REJECT or cursor.* == bytes.len) { |
| 123 | @branchHint(.cold); | 123 | @branchHint(.cold); |
| 124 | // This, and the branch below, detect truncation, the | ||
| 125 | // only invalid state handled differently by the Maximal | ||
| 126 | // Subparts algorithm. | ||
| 124 | if (state_dfa[@intCast(u8dfa[byte])] == RUNE_REJECT) { | 127 | if (state_dfa[@intCast(u8dfa[byte])] == RUNE_REJECT) { |
| 125 | cursor.* -= 2; // +1 | 128 | cursor.* -= 2; // +1 |
| 126 | return .{ | 129 | return .{ |