summaryrefslogtreecommitdiff
path: root/src/LetterCasing.zig
diff options
context:
space:
mode:
authorGravatar Sam Atman2025-04-30 16:48:07 -0400
committerGravatar Sam Atman2025-04-30 16:48:07 -0400
commitd2d42bf3ef5490f6fdec73508c2493a666ecee41 (patch)
tree377794be59ece4118ca2449b705b8e7cc646abc0 /src/LetterCasing.zig
parentUpdate README.md to new API (diff)
downloadzg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.gz
zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.tar.xz
zg-d2d42bf3ef5490f6fdec73508c2493a666ecee41.zip
Setup variants for all allocating modules
This harmonizes the allocating modules in a couple of ways. All can now be constructed by pointer, and all treat various miscellaneous read failures as `unreachable`, which indeed they should be. The README has been updated to inform users of this option.
Diffstat (limited to 'src/LetterCasing.zig')
-rw-r--r--src/LetterCasing.zig62
1 files changed, 37 insertions, 25 deletions
diff --git a/src/LetterCasing.zig b/src/LetterCasing.zig
index 0a0acb1..a7260b8 100644
--- a/src/LetterCasing.zig
+++ b/src/LetterCasing.zig
@@ -1,25 +1,31 @@
1const std = @import("std");
2const builtin = @import("builtin");
3const compress = std.compress;
4const mem = std.mem;
5const testing = std.testing;
6const unicode = std.unicode;
7
8const CodePointIterator = @import("code_point").Iterator; 1const CodePointIterator = @import("code_point").Iterator;
9 2
10case_map: [][2]u21, 3case_map: [][2]u21 = undefined,
11prop_s1: []u16 = undefined, 4prop_s1: []u16 = undefined,
12prop_s2: []u8 = undefined, 5prop_s2: []u8 = undefined,
13 6
14const Self = @This(); 7const LetterCasing = @This();
8
9pub fn init(allocator: Allocator) Allocator.Error!LetterCasing {
10 var case = LetterCasing{};
11 try case.setup(allocator);
12 return case;
13}
15 14
16pub fn init(allocator: mem.Allocator) !Self { 15pub fn setup(case: *LetterCasing, allocator: Allocator) Allocator.Error!void {
16 case.setupInner(allocator) catch |err| {
17 switch (err) {
18 error.OutOfMemory => |e| return e,
19 else => unreachable,
20 }
21 };
22}
23
24inline fn setupInner(self: *LetterCasing, allocator: mem.Allocator) !void {
17 const decompressor = compress.flate.inflate.decompressor; 25 const decompressor = compress.flate.inflate.decompressor;
18 const endian = builtin.cpu.arch.endian(); 26 const endian = builtin.cpu.arch.endian();
19 27
20 var self = Self{ 28 self.case_map = try allocator.alloc([2]u21, 0x110000);
21 .case_map = try allocator.alloc([2]u21, 0x110000),
22 };
23 errdefer allocator.free(self.case_map); 29 errdefer allocator.free(self.case_map);
24 30
25 for (0..0x110000) |i| { 31 for (0..0x110000) |i| {
@@ -68,28 +74,26 @@ pub fn init(allocator: mem.Allocator) !Self {
68 self.prop_s2 = try allocator.alloc(u8, stage_2_len); 74 self.prop_s2 = try allocator.alloc(u8, stage_2_len);
69 errdefer allocator.free(self.prop_s2); 75 errdefer allocator.free(self.prop_s2);
70 _ = try cp_reader.readAll(self.prop_s2); 76 _ = try cp_reader.readAll(self.prop_s2);
71
72 return self;
73} 77}
74 78
75pub fn deinit(self: *const Self, allocator: mem.Allocator) void { 79pub fn deinit(self: *const LetterCasing, allocator: mem.Allocator) void {
76 allocator.free(self.case_map); 80 allocator.free(self.case_map);
77 allocator.free(self.prop_s1); 81 allocator.free(self.prop_s1);
78 allocator.free(self.prop_s2); 82 allocator.free(self.prop_s2);
79} 83}
80 84
81// Returns true if `cp` is either upper, lower, or title case. 85// Returns true if `cp` is either upper, lower, or title case.
82pub fn isCased(self: Self, cp: u21) bool { 86pub fn isCased(self: LetterCasing, cp: u21) bool {
83 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; 87 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4;
84} 88}
85 89
86// Returns true if `cp` is uppercase. 90// Returns true if `cp` is uppercase.
87pub fn isUpper(self: Self, cp: u21) bool { 91pub fn isUpper(self: LetterCasing, cp: u21) bool {
88 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; 92 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2;
89} 93}
90 94
91/// Returns true if `str` is all uppercase. 95/// Returns true if `str` is all uppercase.
92pub fn isUpperStr(self: Self, str: []const u8) bool { 96pub fn isUpperStr(self: LetterCasing, str: []const u8) bool {
93 var iter = CodePointIterator{ .bytes = str }; 97 var iter = CodePointIterator{ .bytes = str };
94 98
95 return while (iter.next()) |cp| { 99 return while (iter.next()) |cp| {
@@ -107,14 +111,14 @@ test "isUpperStr" {
107} 111}
108 112
109/// Returns uppercase mapping for `cp`. 113/// Returns uppercase mapping for `cp`.
110pub fn toUpper(self: Self, cp: u21) u21 { 114pub fn toUpper(self: LetterCasing, cp: u21) u21 {
111 return self.case_map[cp][0]; 115 return self.case_map[cp][0];
112} 116}
113 117
114/// Returns a new string with all letters in uppercase. 118/// Returns a new string with all letters in uppercase.
115/// Caller must free returned bytes with `allocator`. 119/// Caller must free returned bytes with `allocator`.
116pub fn toUpperStr( 120pub fn toUpperStr(
117 self: Self, 121 self: LetterCasing,
118 allocator: mem.Allocator, 122 allocator: mem.Allocator,
119 str: []const u8, 123 str: []const u8,
120) ![]u8 { 124) ![]u8 {
@@ -142,12 +146,12 @@ test "toUpperStr" {
142} 146}
143 147
144// Returns true if `cp` is lowercase. 148// Returns true if `cp` is lowercase.
145pub fn isLower(self: Self, cp: u21) bool { 149pub fn isLower(self: LetterCasing, cp: u21) bool {
146 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; 150 return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1;
147} 151}
148 152
149/// Returns true if `str` is all lowercase. 153/// Returns true if `str` is all lowercase.
150pub fn isLowerStr(self: Self, str: []const u8) bool { 154pub fn isLowerStr(self: LetterCasing, str: []const u8) bool {
151 var iter = CodePointIterator{ .bytes = str }; 155 var iter = CodePointIterator{ .bytes = str };
152 156
153 return while (iter.next()) |cp| { 157 return while (iter.next()) |cp| {
@@ -165,14 +169,14 @@ test "isLowerStr" {
165} 169}
166 170
167/// Returns lowercase mapping for `cp`. 171/// Returns lowercase mapping for `cp`.
168pub fn toLower(self: Self, cp: u21) u21 { 172pub fn toLower(self: LetterCasing, cp: u21) u21 {
169 return self.case_map[cp][1]; 173 return self.case_map[cp][1];
170} 174}
171 175
172/// Returns a new string with all letters in lowercase. 176/// Returns a new string with all letters in lowercase.
173/// Caller must free returned bytes with `allocator`. 177/// Caller must free returned bytes with `allocator`.
174pub fn toLowerStr( 178pub fn toLowerStr(
175 self: Self, 179 self: LetterCasing,
176 allocator: mem.Allocator, 180 allocator: mem.Allocator,
177 str: []const u8, 181 str: []const u8,
178) ![]u8 { 182) ![]u8 {
@@ -198,3 +202,11 @@ test "toLowerStr" {
198 defer testing.allocator.free(lowered); 202 defer testing.allocator.free(lowered);
199 try testing.expectEqualStrings("hello, world 2112!", lowered); 203 try testing.expectEqualStrings("hello, world 2112!", lowered);
200} 204}
205
206const std = @import("std");
207const builtin = @import("builtin");
208const compress = std.compress;
209const mem = std.mem;
210const Allocator = std.mem.Allocator;
211const testing = std.testing;
212const unicode = std.unicode;