summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Words.zig42
-rw-r--r--src/unicode_tests.zig6
2 files changed, 24 insertions, 24 deletions
diff --git a/src/Words.zig b/src/Words.zig
index 6a532f5..565a2fb 100644
--- a/src/Words.zig
+++ b/src/Words.zig
@@ -25,15 +25,15 @@ const WordBreakProperty = enum(u5) {
25s1: []u16 = undefined, 25s1: []u16 = undefined,
26s2: []u5 = undefined, 26s2: []u5 = undefined,
27 27
28const WordBreak = @This(); 28const Words = @This();
29 29
30pub fn init(allocator: Allocator) Allocator.Error!WordBreak { 30pub fn init(allocator: Allocator) Allocator.Error!Words {
31 var wb: WordBreak = undefined; 31 var wb: Words = undefined;
32 try wb.setup(allocator); 32 try wb.setup(allocator);
33 return wb; 33 return wb;
34} 34}
35 35
36pub fn setup(wb: *WordBreak, allocator: Allocator) Allocator.Error!void { 36pub fn setup(wb: *Words, allocator: Allocator) Allocator.Error!void {
37 wb.setupImpl(allocator) catch |err| { 37 wb.setupImpl(allocator) catch |err| {
38 switch (err) { 38 switch (err) {
39 error.OutOfMemory => |e| return e, 39 error.OutOfMemory => |e| return e,
@@ -42,7 +42,7 @@ pub fn setup(wb: *WordBreak, allocator: Allocator) Allocator.Error!void {
42 }; 42 };
43} 43}
44 44
45pub fn deinit(wordbreak: *const WordBreak, allocator: mem.Allocator) void { 45pub fn deinit(wordbreak: *const Words, allocator: mem.Allocator) void {
46 allocator.free(wordbreak.s1); 46 allocator.free(wordbreak.s1);
47 allocator.free(wordbreak.s2); 47 allocator.free(wordbreak.s2);
48} 48}
@@ -60,19 +60,19 @@ pub const Word = struct {
60}; 60};
61 61
62/// Returns the word break property type for `cp`. 62/// Returns the word break property type for `cp`.
63pub fn breakProperty(wordbreak: *const WordBreak, cp: u21) WordBreakProperty { 63pub fn breakProperty(wordbreak: *const Words, cp: u21) WordBreakProperty {
64 return @enumFromInt(wordbreak.s2[wordbreak.s1[cp >> 8] + (cp & 0xff)]); 64 return @enumFromInt(wordbreak.s2[wordbreak.s1[cp >> 8] + (cp & 0xff)]);
65} 65}
66 66
67/// Convenience function for working with CodePoints 67/// Convenience function for working with CodePoints
68fn breakProp(wb: *const WordBreak, point: CodePoint) WordBreakProperty { 68fn breakProp(wb: *const Words, point: CodePoint) WordBreakProperty {
69 return @enumFromInt(wb.s2[wb.s1[point.code >> 8] + (point.code & 0xff)]); 69 return @enumFromInt(wb.s2[wb.s1[point.code >> 8] + (point.code & 0xff)]);
70} 70}
71 71
72/// Returns the Word at the given index. Asserts that the index is less than 72/// Returns the Word at the given index. Asserts that the index is less than
73/// `string.len`, and that the string is not empty. Always returns a word. 73/// `string.len`, and that the string is not empty. Always returns a word.
74/// The index does not have to be the start of a codepoint in the word. 74/// The index does not have to be the start of a codepoint in the word.
75pub fn wordAtIndex(wordbreak: *const WordBreak, string: []const u8, index: usize) Word { 75pub fn wordAtIndex(wordbreak: *const Words, string: []const u8, index: usize) Word {
76 assert(index < string.len and string.len > 0); 76 assert(index < string.len and string.len > 0);
77 var iter_back: ReverseIterator = initAtIndex(wordbreak, string, index); 77 var iter_back: ReverseIterator = initAtIndex(wordbreak, string, index);
78 const first_back = iter_back.prev(); 78 const first_back = iter_back.prev();
@@ -118,12 +118,12 @@ pub fn wordAtIndex(wordbreak: *const WordBreak, string: []const u8, index: usize
118} 118}
119 119
120/// Returns an iterator over words in `slice`. 120/// Returns an iterator over words in `slice`.
121pub fn iterator(wordbreak: *const WordBreak, slice: []const u8) Iterator { 121pub fn iterator(wordbreak: *const Words, slice: []const u8) Iterator {
122 return Iterator.init(wordbreak, slice); 122 return Iterator.init(wordbreak, slice);
123} 123}
124 124
125/// Returns a reverse iterator over the words in `slice`. 125/// Returns a reverse iterator over the words in `slice`.
126pub fn reverseIterator(wordbreak: *const WordBreak, slice: []const u8) ReverseIterator { 126pub fn reverseIterator(wordbreak: *const Words, slice: []const u8) ReverseIterator {
127 return ReverseIterator.init(wordbreak, slice); 127 return ReverseIterator.init(wordbreak, slice);
128} 128}
129 129
@@ -132,10 +132,10 @@ pub const Iterator = struct {
132 this: ?CodePoint = null, 132 this: ?CodePoint = null,
133 that: ?CodePoint = null, 133 that: ?CodePoint = null,
134 cp_iter: CodepointIterator, 134 cp_iter: CodepointIterator,
135 wb: *const WordBreak, 135 wb: *const Words,
136 136
137 /// Assumes `str` is valid UTF-8. 137 /// Assumes `str` is valid UTF-8.
138 pub fn init(wb: *const WordBreak, str: []const u8) Iterator { 138 pub fn init(wb: *const Words, str: []const u8) Iterator {
139 var wb_iter: Iterator = .{ .cp_iter = .init(str), .wb = wb }; 139 var wb_iter: Iterator = .{ .cp_iter = .init(str), .wb = wb };
140 wb_iter.advance(); 140 wb_iter.advance();
141 return wb_iter; 141 return wb_iter;
@@ -314,11 +314,11 @@ pub const ReverseIterator = struct {
314 after: ?CodePoint = null, 314 after: ?CodePoint = null,
315 before: ?CodePoint = null, 315 before: ?CodePoint = null,
316 cp_iter: ReverseCodepointIterator, 316 cp_iter: ReverseCodepointIterator,
317 wb: *const WordBreak, 317 wb: *const Words,
318 flags: usize = 0, 318 flags: usize = 0,
319 319
320 /// Assumes `str` is valid UTF-8. 320 /// Assumes `str` is valid UTF-8.
321 pub fn init(wb: *const WordBreak, str: []const u8) ReverseIterator { 321 pub fn init(wb: *const Words, str: []const u8) ReverseIterator {
322 var wb_iter: ReverseIterator = .{ .cp_iter = .init(str), .wb = wb }; 322 var wb_iter: ReverseIterator = .{ .cp_iter = .init(str), .wb = wb };
323 wb_iter.advance(); 323 wb_iter.advance();
324 return wb_iter; 324 return wb_iter;
@@ -511,7 +511,7 @@ pub const ReverseIterator = struct {
511//| Implementation Details 511//| Implementation Details
512 512
513/// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`. 513/// Initialize a ReverseIterator at the provided index. Used in `wordAtIndex`.
514fn initAtIndex(wb: *const WordBreak, string: []const u8, index: usize) ReverseIterator { 514fn initAtIndex(wb: *const Words, string: []const u8, index: usize) ReverseIterator {
515 var idx: u32 = @intCast(index); 515 var idx: u32 = @intCast(index);
516 // Find the next lead byte: 516 // Find the next lead byte:
517 while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {} 517 while (idx < string.len and 0x80 <= string[idx] and string[idx] <= 0xBf) : (idx += 1) {}
@@ -536,7 +536,7 @@ fn sneaky(iter: *const ReverseIterator) SneakIterator {
536 536
537const SneakIterator = struct { 537const SneakIterator = struct {
538 cp_iter: ReverseCodepointIterator, 538 cp_iter: ReverseCodepointIterator,
539 wb: *const WordBreak, 539 wb: *const Words,
540 540
541 fn peek(iter: *SneakIterator) ?CodePoint { 541 fn peek(iter: *SneakIterator) ?CodePoint {
542 const save_cp = iter.cp_iter; 542 const save_cp = iter.cp_iter;
@@ -570,7 +570,7 @@ const SneakIterator = struct {
570 } 570 }
571}; 571};
572 572
573inline fn setupImpl(wb: *WordBreak, allocator: Allocator) !void { 573inline fn setupImpl(wb: *Words, allocator: Allocator) !void {
574 const decompressor = compress.flate.inflate.decompressor; 574 const decompressor = compress.flate.inflate.decompressor;
575 const in_bytes = @embedFile("wbp"); 575 const in_bytes = @embedFile("wbp");
576 var in_fbs = std.io.fixedBufferStream(in_bytes); 576 var in_fbs = std.io.fixedBufferStream(in_bytes);
@@ -627,7 +627,7 @@ inline fn isExtensible(wbp: WordBreakProperty) bool {
627} 627}
628 628
629test "Word Break Properties" { 629test "Word Break Properties" {
630 const wb = try WordBreak.init(testing.allocator); 630 const wb = try Words.init(testing.allocator);
631 defer wb.deinit(testing.allocator); 631 defer wb.deinit(testing.allocator);
632 try testing.expectEqual(.CR, wb.breakProperty('\r')); 632 try testing.expectEqual(.CR, wb.breakProperty('\r'));
633 try testing.expectEqual(.LF, wb.breakProperty('\n')); 633 try testing.expectEqual(.LF, wb.breakProperty('\n'));
@@ -641,7 +641,7 @@ test "ext_pict" {
641} 641}
642 642
643test wordAtIndex { 643test wordAtIndex {
644 const wb = try WordBreak.init(testing.allocator); 644 const wb = try Words.init(testing.allocator);
645 defer wb.deinit(testing.allocator); 645 defer wb.deinit(testing.allocator);
646 const t_string = "first second third"; 646 const t_string = "first second third";
647 const second = wb.wordAtIndex(t_string, 8); 647 const second = wb.wordAtIndex(t_string, 8);
@@ -663,7 +663,7 @@ test wordAtIndex {
663const testr = "don't a:ka fin!"; 663const testr = "don't a:ka fin!";
664 664
665test "reversal" { 665test "reversal" {
666 const wb = try WordBreak.init(testing.allocator); 666 const wb = try Words.init(testing.allocator);
667 defer wb.deinit(testing.allocator); 667 defer wb.deinit(testing.allocator);
668 { 668 {
669 var fwd = wb.iterator(testr); 669 var fwd = wb.iterator(testr);
@@ -696,7 +696,7 @@ test "reversal" {
696} 696}
697 697
698fn testAllocations(allocator: Allocator) !void { 698fn testAllocations(allocator: Allocator) !void {
699 const wb = try WordBreak.init(allocator); 699 const wb = try Words.init(allocator);
700 wb.deinit(allocator); 700 wb.deinit(allocator);
701} 701}
702 702
diff --git a/src/unicode_tests.zig b/src/unicode_tests.zig
index 7139d4c..18f1814 100644
--- a/src/unicode_tests.zig
+++ b/src/unicode_tests.zig
@@ -211,7 +211,7 @@ test "Segmentation Word Iterator" {
211 var buf_reader = std.io.bufferedReader(file.reader()); 211 var buf_reader = std.io.bufferedReader(file.reader());
212 var input_stream = buf_reader.reader(); 212 var input_stream = buf_reader.reader();
213 213
214 const wb = try WordBreak.init(allocator); 214 const wb = try Words.init(allocator);
215 defer wb.deinit(allocator); 215 defer wb.deinit(allocator);
216 216
217 var buf: [4096]u8 = undefined; 217 var buf: [4096]u8 = undefined;
@@ -392,5 +392,5 @@ const Graphemes = @import("Graphemes");
392const GraphemeIterator = @import("Graphemes").Iterator; 392const GraphemeIterator = @import("Graphemes").Iterator;
393const Normalize = @import("Normalize"); 393const Normalize = @import("Normalize");
394 394
395const WordBreak = @import("WordBreak"); 395const Words = @import("Words");
396const Word = WordBreak.Word; 396const Word = Words.Word;