diff options
| author | 2024-03-26 21:53:04 -0400 | |
|---|---|---|
| committer | 2024-03-26 21:53:04 -0400 | |
| commit | 2d7959f03575e637d56924c14e2a37b54368953e (patch) | |
| tree | f4b9858b4c7223921d043111f06ee2758ad3c724 /src | |
| parent | Using diff for lowercase mapping (diff) | |
| download | zg-2d7959f03575e637d56924c14e2a37b54368953e.tar.gz zg-2d7959f03575e637d56924c14e2a37b54368953e.tar.xz zg-2d7959f03575e637d56924c14e2a37b54368953e.zip | |
GraphemeData and Normalize non-pub fns
Diffstat (limited to 'src')
| -rw-r--r-- | src/Normalize.zig | 12 | ||||
| -rw-r--r-- | src/grapheme.zig | 14 |
2 files changed, 13 insertions, 13 deletions
diff --git a/src/Normalize.zig b/src/Normalize.zig index b5a54d1..6ef7c90 100644 --- a/src/Normalize.zig +++ b/src/Normalize.zig | |||
| @@ -91,8 +91,8 @@ const Decomp = struct { | |||
| 91 | cps: []const u21 = &.{}, | 91 | cps: []const u21 = &.{}, |
| 92 | }; | 92 | }; |
| 93 | 93 | ||
| 94 | /// `mapping` retrieves the decomposition mapping for a code point as per the UCD. | 94 | // `mapping` retrieves the decomposition mapping for a code point as per the UCD. |
| 95 | pub fn mapping(self: Self, cp: u21, form: Form) Decomp { | 95 | fn mapping(self: Self, cp: u21, form: Form) Decomp { |
| 96 | var dc = Decomp{}; | 96 | var dc = Decomp{}; |
| 97 | 97 | ||
| 98 | switch (form) { | 98 | switch (form) { |
| @@ -117,8 +117,8 @@ pub fn mapping(self: Self, cp: u21, form: Form) Decomp { | |||
| 117 | return dc; | 117 | return dc; |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | /// `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. | 120 | // `decompose` a code point to the specified normalization form, which should be either `.nfd` or `.nfkd`. |
| 121 | pub fn decompose( | 121 | fn decompose( |
| 122 | self: Self, | 122 | self: Self, |
| 123 | cp: u21, | 123 | cp: u21, |
| 124 | form: Form, | 124 | form: Form, |
| @@ -587,8 +587,8 @@ fn getTrailCcc(self: Self, cp: u21) u8 { | |||
| 587 | return self.norm_data.ccc_data.ccc(dcp); | 587 | return self.norm_data.ccc_data.ccc(dcp); |
| 588 | } | 588 | } |
| 589 | 589 | ||
| 590 | /// Fast check to detect if a string is already in NFC or NFD form. | 590 | // Fast check to detect if a string is already in NFC or NFD form. |
| 591 | pub fn isFcd(self: Self, str: []const u8) bool { | 591 | fn isFcd(self: Self, str: []const u8) bool { |
| 592 | var prev_ccc: u8 = 0; | 592 | var prev_ccc: u8 = 0; |
| 593 | var cp_iter = CodePointIterator{ .bytes = str }; | 593 | var cp_iter = CodePointIterator{ .bytes = str }; |
| 594 | 594 | ||
diff --git a/src/grapheme.zig b/src/grapheme.zig index 7125b5b..e55a6a4 100644 --- a/src/grapheme.zig +++ b/src/grapheme.zig | |||
| @@ -4,7 +4,7 @@ const unicode = std.unicode; | |||
| 4 | 4 | ||
| 5 | const CodePoint = @import("code_point").CodePoint; | 5 | const CodePoint = @import("code_point").CodePoint; |
| 6 | const CodePointIterator = @import("code_point").Iterator; | 6 | const CodePointIterator = @import("code_point").Iterator; |
| 7 | pub const Data = @import("GraphemeData"); | 7 | pub const GraphemeData = @import("GraphemeData"); |
| 8 | 8 | ||
| 9 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. | 9 | /// `Grapheme` represents a Unicode grapheme cluster by its length and offset in the source bytes. |
| 10 | pub const Grapheme = struct { | 10 | pub const Grapheme = struct { |
| @@ -22,12 +22,12 @@ pub const Grapheme = struct { | |||
| 22 | pub const Iterator = struct { | 22 | pub const Iterator = struct { |
| 23 | buf: [2]?CodePoint = .{ null, null }, | 23 | buf: [2]?CodePoint = .{ null, null }, |
| 24 | cp_iter: CodePointIterator, | 24 | cp_iter: CodePointIterator, |
| 25 | data: *Data, | 25 | data: *const GraphemeData, |
| 26 | 26 | ||
| 27 | const Self = @This(); | 27 | const Self = @This(); |
| 28 | 28 | ||
| 29 | /// Assumes `src` is valid UTF-8. | 29 | /// Assumes `src` is valid UTF-8. |
| 30 | pub fn init(str: []const u8, data: *Data) Self { | 30 | pub fn init(str: []const u8, data: *const GraphemeData) Self { |
| 31 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; | 31 | var self = Self{ .cp_iter = .{ .bytes = str }, .data = data }; |
| 32 | self.advance(); | 32 | self.advance(); |
| 33 | return self; | 33 | return self; |
| @@ -80,7 +80,7 @@ pub const Iterator = struct { | |||
| 80 | }; | 80 | }; |
| 81 | 81 | ||
| 82 | // Predicates | 82 | // Predicates |
| 83 | fn isBreaker(cp: u21, data: *Data) bool { | 83 | fn isBreaker(cp: u21, data: *const GraphemeData) bool { |
| 84 | // Extract relevant properties. | 84 | // Extract relevant properties. |
| 85 | const cp_gbp_prop = data.gbp(cp); | 85 | const cp_gbp_prop = data.gbp(cp); |
| 86 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; | 86 | return cp == '\x0d' or cp == '\x0a' or cp_gbp_prop == .Control; |
| @@ -133,7 +133,7 @@ const State = struct { | |||
| 133 | pub fn graphemeBreak( | 133 | pub fn graphemeBreak( |
| 134 | cp1: u21, | 134 | cp1: u21, |
| 135 | cp2: u21, | 135 | cp2: u21, |
| 136 | data: *Data, | 136 | data: *const GraphemeData, |
| 137 | state: *State, | 137 | state: *State, |
| 138 | ) bool { | 138 | ) bool { |
| 139 | // Extract relevant properties. | 139 | // Extract relevant properties. |
| @@ -237,7 +237,7 @@ test "Segmentation GraphemeIterator" { | |||
| 237 | var buf_reader = std.io.bufferedReader(file.reader()); | 237 | var buf_reader = std.io.bufferedReader(file.reader()); |
| 238 | var input_stream = buf_reader.reader(); | 238 | var input_stream = buf_reader.reader(); |
| 239 | 239 | ||
| 240 | var data = try Data.init(allocator); | 240 | var data = try GraphemeData.init(allocator); |
| 241 | defer data.deinit(); | 241 | defer data.deinit(); |
| 242 | 242 | ||
| 243 | var buf: [4096]u8 = undefined; | 243 | var buf: [4096]u8 = undefined; |
| @@ -302,7 +302,7 @@ test "Segmentation ZWJ and ZWSP emoji sequences" { | |||
| 302 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; | 302 | const with_zwsp = seq_1 ++ "\u{200B}" ++ seq_2; |
| 303 | const no_joiner = seq_1 ++ seq_2; | 303 | const no_joiner = seq_1 ++ seq_2; |
| 304 | 304 | ||
| 305 | var data = try Data.init(std.testing.allocator); | 305 | var data = try GraphemeData.init(std.testing.allocator); |
| 306 | defer data.deinit(); | 306 | defer data.deinit(); |
| 307 | 307 | ||
| 308 | var iter = Iterator.init(with_zwj, &data); | 308 | var iter = Iterator.init(with_zwj, &data); |