summaryrefslogtreecommitdiff
path: root/src/grapheme.zig
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-26 21:53:04 -0400
committerGravatar Jose Colon Rodriguez2024-03-26 21:53:04 -0400
commit2d7959f03575e637d56924c14e2a37b54368953e (patch)
treef4b9858b4c7223921d043111f06ee2758ad3c724 /src/grapheme.zig
parentUsing diff for lowercase mapping (diff)
downloadzg-2d7959f03575e637d56924c14e2a37b54368953e.tar.gz
zg-2d7959f03575e637d56924c14e2a37b54368953e.tar.xz
zg-2d7959f03575e637d56924c14e2a37b54368953e.zip
GraphemeData and Normalize non-pub fns
Diffstat (limited to 'src/grapheme.zig')
-rw-r--r--src/grapheme.zig14
1 files changed, 7 insertions, 7 deletions
diff --git a/src/grapheme.zig b/src/grapheme.zig
index 7125b5b..e55a6a4 100644
--- a/src/grapheme.zig
+++ b/src/grapheme.zig
@@ -4,7 +4,7 @@ const unicode = std.unicode;
4 4
5const CodePoint = @import("code_point").CodePoint; 5const CodePoint = @import("code_point").CodePoint;
6const CodePointIterator = @import("code_point").Iterator; 6const CodePointIterator = @import("code_point").Iterator;
7pub const Data = @import("GraphemeData"); 7pub const GraphemeData = @import("GraphemeData");
8 8
9/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 9/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
10pub const Grapheme = struct { 10pub const Grapheme = struct {
@@ -22,12 +22,12 @@ pub const Grapheme = struct {
22pub const Iterator = struct { 22pub const Iterator = struct {
23 buf: [2]?CodePoint = .{ null, null }, 23 buf: [2]?CodePoint = .{ null, null },
24 cp_iter: CodePointIterator, 24 cp_iter: CodePointIterator,
25 data: *Data, 25 data: *const GraphemeData,
26 26
27 const Self = @This(); 27 const Self = @This();
28 28
29 /// Assumes `src` is valid UTF-8. 29 /// Assumes `src` is valid UTF-8.
30 pub fn init(str: []const u8, data: *Data) Self { 30 pub fn init(str: []const u8, data: *const GraphemeData) Self {
31 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; 31 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
32 self.advance(); 32 self.advance();
33 return self; 33 return self;
@@ -80,7 +80,7 @@ pub const Iterator = struct {
80}; 80};
81 81
82// Predicates 82// Predicates
83fn isBreaker(cp: u21, data: *Data) bool { 83fn isBreaker(cp: u21, data: *const GraphemeData) bool {
84 // Extract relevant properties. 84 // Extract relevant properties.
85 const cp_gbp_prop = data.gbp(cp); 85 const cp_gbp_prop = data.gbp(cp);
86 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; 86 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
@@ -133,7 +133,7 @@ const State = struct {
133pub fn graphemeBreak( 133pub fn graphemeBreak(
134 cp1: u21, 134 cp1: u21,
135 cp2: u21, 135 cp2: u21,
136 data: *Data, 136 data: *const GraphemeData,
137 state: *State, 137 state: *State,
138) bool { 138) bool {
139 // Extract relevant properties. 139 // Extract relevant properties.
@@ -237,7 +237,7 @@ test "Segmentation GraphemeIterator" {
237 var buf_reader = std.io.bufferedReader(file.reader()); 237 var buf_reader = std.io.bufferedReader(file.reader());
238 var input_stream = buf_reader.reader(); 238 var input_stream = buf_reader.reader();
239 239
240 var data = try Data.init(allocator); 240 var data = try GraphemeData.init(allocator);
241 defer data.deinit(); 241 defer data.deinit();
242 242
243 var buf: [4096]u8 = undefined; 243 var buf: [4096]u8 = undefined;
@@ -302,7 +302,7 @@ test "Segmentation ZWJ and ZWSP emoji sequences" {
302 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; 302 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
303 const no_joiner = seq_1 ++ seq_2; 303 const no_joiner = seq_1 ++ seq_2;
304 304
305 var data = try Data.init(std.testing.allocator); 305 var data = try GraphemeData.init(std.testing.allocator);
306 defer data.deinit(); 306 defer data.deinit();
307 307
308 var iter = Iterator.init(with_zwj, &data); 308 var iter = Iterator.init(with_zwj, &data);