summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jose Colon Rodriguez2024-03-26 21:53:04 -0400
committerGravatar Jose Colon Rodriguez2024-03-26 21:53:04 -0400
commit2d7959f03575e637d56924c14e2a37b54368953e (patch)
treef4b9858b4c7223921d043111f06ee2758ad3c724 /src
parentUsing diff for lowercase mapping (diff)
downloadzg-2d7959f03575e637d56924c14e2a37b54368953e.tar.gz
zg-2d7959f03575e637d56924c14e2a37b54368953e.tar.xz
zg-2d7959f03575e637d56924c14e2a37b54368953e.zip
GraphemeData and Normalize non-pub fns
Diffstat (limited to 'src')
-rw-r--r--src/Normalize.zig12
-rw-r--r--src/grapheme.zig14
2 files changed, 13 insertions, 13 deletions
diff --git a/src/Normalize.zig b/src/Normalize.zig
index b5a54d1..6ef7c90 100644
--- a/src/Normalize.zig
+++ b/src/Normalize.zig
@@ -91,8 +91,8 @@ const Decomp = struct {
91 cps: []const u21 = &.{}, 91 cps: []const u21 = &.{},
92}; 92};
93 93
94/// `mapping` retrieves the decomposition mapping for a code point as per the UCD. 94// `mapping` retrieves the decomposition mapping for a code point as per the UCD.
95pub fn mapping(self: Self, cp: u21, form: Form) Decomp { 95fn mapping(self: Self, cp: u21, form: Form) Decomp {
96 var dc = Decomp{}; 96 var dc = Decomp{};
97 97
98 switch (form) { 98 switch (form) {
@@ -117,8 +117,8 @@ pub fn mapping(self: Self, cp: u21, form: Form) Decomp {
117 return dc; 117 return dc;
118} 118}
119 119
120/// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. 120// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`.
121pub fn decompose( 121fn decompose(
122 self: Self, 122 self: Self,
123 cp: u21, 123 cp: u21,
124 form: Form, 124 form: Form,
@@ -587,8 +587,8 @@ fn getTrailCcc(self: Self, cp: u21) u8 {
587 return self.norm_data.ccc_data.ccc(dcp); 587 return self.norm_data.ccc_data.ccc(dcp);
588} 588}
589 589
590/// Fast check to detect if a string is already in NFC or NFD form. 590// Fast check to detect if a string is already in NFC or NFD form.
591pub fn isFcd(self: Self, str: []const u8) bool { 591fn isFcd(self: Self, str: []const u8) bool {
592 var prev_ccc: u8 = 0; 592 var prev_ccc: u8 = 0;
593 var cp_iter = CodePointIterator{ .bytes = str }; 593 var cp_iter = CodePointIterator{ .bytes = str };
594 594
diff --git a/src/grapheme.zig b/src/grapheme.zig
index 7125b5b..e55a6a4 100644
--- a/src/grapheme.zig
+++ b/src/grapheme.zig
@@ -4,7 +4,7 @@ const unicode = std.unicode;
4 4
5const CodePoint = @import("code_point").CodePoint; 5const CodePoint = @import("code_point").CodePoint;
6const CodePointIterator = @import("code_point").Iterator; 6const CodePointIterator = @import("code_point").Iterator;
7pub const Data = @import("GraphemeData"); 7pub const GraphemeData = @import("GraphemeData");
8 8
9/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. 9/// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes.
10pub const Grapheme = struct { 10pub const Grapheme = struct {
@@ -22,12 +22,12 @@ pub const Grapheme = struct {
22pub const Iterator = struct { 22pub const Iterator = struct {
23 buf: [2]?CodePoint = .{ null, null }, 23 buf: [2]?CodePoint = .{ null, null },
24 cp_iter: CodePointIterator, 24 cp_iter: CodePointIterator,
25 data: *Data, 25 data: *const GraphemeData,
26 26
27 const Self = @This(); 27 const Self = @This();
28 28
29 /// Assumes `src` is valid UTF-8. 29 /// Assumes `src` is valid UTF-8.
30 pub fn init(str: []const u8, data: *Data) Self { 30 pub fn init(str: []const u8, data: *const GraphemeData) Self {
31 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; 31 var self = Self{ .cp_iter = .{ .bytes = str }, .data = data };
32 self.advance(); 32 self.advance();
33 return self; 33 return self;
@@ -80,7 +80,7 @@ pub const Iterator = struct {
80}; 80};
81 81
82// Predicates 82// Predicates
83fn isBreaker(cp: u21, data: *Data) bool { 83fn isBreaker(cp: u21, data: *const GraphemeData) bool {
84 // Extract relevant properties. 84 // Extract relevant properties.
85 const cp_gbp_prop = data.gbp(cp); 85 const cp_gbp_prop = data.gbp(cp);
86 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; 86 return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control;
@@ -133,7 +133,7 @@ const State = struct {
133pub fn graphemeBreak( 133pub fn graphemeBreak(
134 cp1: u21, 134 cp1: u21,
135 cp2: u21, 135 cp2: u21,
136 data: *Data, 136 data: *const GraphemeData,
137 state: *State, 137 state: *State,
138) bool { 138) bool {
139 // Extract relevant properties. 139 // Extract relevant properties.
@@ -237,7 +237,7 @@ test "Segmentation GraphemeIterator" {
237 var buf_reader = std.io.bufferedReader(file.reader()); 237 var buf_reader = std.io.bufferedReader(file.reader());
238 var input_stream = buf_reader.reader(); 238 var input_stream = buf_reader.reader();
239 239
240 var data = try Data.init(allocator); 240 var data = try GraphemeData.init(allocator);
241 defer data.deinit(); 241 defer data.deinit();
242 242
243 var buf: [4096]u8 = undefined; 243 var buf: [4096]u8 = undefined;
@@ -302,7 +302,7 @@ test "Segmentation ZWJ and ZWSP emoji sequences" {
302 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; 302 const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2;
303 const no_joiner = seq_1 ++ seq_2; 303 const no_joiner = seq_1 ++ seq_2;
304 304
305 var data = try Data.init(std.testing.allocator); 305 var data = try GraphemeData.init(std.testing.allocator);
306 defer data.deinit(); 306 defer data.deinit();
307 307
308 var iter = Iterator.init(with_zwj, &data); 308 var iter = Iterator.init(with_zwj, &data);