From 958c13ba442e7077a50d7163fdeb9bba378f95c2 Mon Sep 17 00:00:00 2001 From: Sam Atman Date: Wed, 30 Apr 2025 15:32:34 -0400 Subject: Rest of the Renamings These get different names, but don't otherwise change. --- src/CaseData.zig | 200 ------------------------------------------------------- 1 file changed, 200 deletions(-) delete mode 100644 src/CaseData.zig (limited to 'src/CaseData.zig') diff --git a/src/CaseData.zig b/src/CaseData.zig deleted file mode 100644 index 0a0acb1..0000000 --- a/src/CaseData.zig +++ /dev/null @@ -1,200 +0,0 @@ -const std = @import("std"); -const builtin = @import("builtin"); -const compress = std.compress; -const mem = std.mem; -const testing = std.testing; -const unicode = std.unicode; - -const CodePointIterator = @import("code_point").Iterator; - -case_map: [][2]u21, -prop_s1: []u16 = undefined, -prop_s2: []u8 = undefined, - -const Self = @This(); - -pub fn init(allocator: mem.Allocator) !Self { - const decompressor = compress.flate.inflate.decompressor; - const endian = builtin.cpu.arch.endian(); - - var self = Self{ - .case_map = try allocator.alloc([2]u21, 0x110000), - }; - errdefer allocator.free(self.case_map); - - for (0..0x110000) |i| { - const cp: u21 = @intCast(i); - self.case_map[cp] = .{ cp, cp }; - } - - // Uppercase - const upper_bytes = @embedFile("upper"); - var upper_fbs = std.io.fixedBufferStream(upper_bytes); - var upper_decomp = decompressor(.raw, upper_fbs.reader()); - var upper_reader = upper_decomp.reader(); - - while (true) { - const cp = try upper_reader.readInt(i24, endian); - if (cp == 0) break; - const diff = try upper_reader.readInt(i24, endian); - self.case_map[@intCast(cp)][0] = @intCast(cp + diff); - } - - // Lowercase - const lower_bytes = @embedFile("lower"); - var lower_fbs = std.io.fixedBufferStream(lower_bytes); - var lower_decomp = decompressor(.raw, lower_fbs.reader()); - var lower_reader = lower_decomp.reader(); - - while (true) { - const cp = try lower_reader.readInt(i24, endian); - if (cp == 0) break; - const diff = try lower_reader.readInt(i24, endian); - self.case_map[@intCast(cp)][1] = @intCast(cp + diff); - } - - // Case properties - const cp_bytes = @embedFile("case_prop"); - var cp_fbs = std.io.fixedBufferStream(cp_bytes); - var cp_decomp = decompressor(.raw, cp_fbs.reader()); - var cp_reader = cp_decomp.reader(); - - const stage_1_len: u16 = try cp_reader.readInt(u16, endian); - self.prop_s1 = try allocator.alloc(u16, stage_1_len); - errdefer allocator.free(self.prop_s1); - for (0..stage_1_len) |i| self.prop_s1[i] = try cp_reader.readInt(u16, endian); - - const stage_2_len: u16 = try cp_reader.readInt(u16, endian); - self.prop_s2 = try allocator.alloc(u8, stage_2_len); - errdefer allocator.free(self.prop_s2); - _ = try cp_reader.readAll(self.prop_s2); - - return self; -} - -pub fn deinit(self: *const Self, allocator: mem.Allocator) void { - allocator.free(self.case_map); - allocator.free(self.prop_s1); - allocator.free(self.prop_s2); -} - -// Returns true if `cp` is either upper, lower, or title case. -pub fn isCased(self: Self, cp: u21) bool { - return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 4 == 4; -} - -// Returns true if `cp` is uppercase. -pub fn isUpper(self: Self, cp: u21) bool { - return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 2 == 2; -} - -/// Returns true if `str` is all uppercase. -pub fn isUpperStr(self: Self, str: []const u8) bool { - var iter = CodePointIterator{ .bytes = str }; - - return while (iter.next()) |cp| { - if (self.isCased(cp.code) and !self.isUpper(cp.code)) break false; - } else true; -} - -test "isUpperStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - try testing.expect(cd.isUpperStr("HELLO, WORLD 2112!")); - try testing.expect(!cd.isUpperStr("hello, world 2112!")); - try testing.expect(!cd.isUpperStr("Hello, World 2112!")); -} - -/// Returns uppercase mapping for `cp`. -pub fn toUpper(self: Self, cp: u21) u21 { - return self.case_map[cp][0]; -} - -/// Returns a new string with all letters in uppercase. -/// Caller must free returned bytes with `allocator`. -pub fn toUpperStr( - self: Self, - allocator: mem.Allocator, - str: []const u8, -) ![]u8 { - var bytes = std.ArrayList(u8).init(allocator); - defer bytes.deinit(); - - var iter = CodePointIterator{ .bytes = str }; - var buf: [4]u8 = undefined; - - while (iter.next()) |cp| { - const len = try unicode.utf8Encode(self.toUpper(cp.code), &buf); - try bytes.appendSlice(buf[0..len]); - } - - return try bytes.toOwnedSlice(); -} - -test "toUpperStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - const uppered = try cd.toUpperStr(testing.allocator, "Hello, World 2112!"); - defer testing.allocator.free(uppered); - try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered); -} - -// Returns true if `cp` is lowercase. -pub fn isLower(self: Self, cp: u21) bool { - return self.prop_s2[self.prop_s1[cp >> 8] + (cp & 0xff)] & 1 == 1; -} - -/// Returns true if `str` is all lowercase. -pub fn isLowerStr(self: Self, str: []const u8) bool { - var iter = CodePointIterator{ .bytes = str }; - - return while (iter.next()) |cp| { - if (self.isCased(cp.code) and !self.isLower(cp.code)) break false; - } else true; -} - -test "isLowerStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - try testing.expect(cd.isLowerStr("hello, world 2112!")); - try testing.expect(!cd.isLowerStr("HELLO, WORLD 2112!")); - try testing.expect(!cd.isLowerStr("Hello, World 2112!")); -} - -/// Returns lowercase mapping for `cp`. -pub fn toLower(self: Self, cp: u21) u21 { - return self.case_map[cp][1]; -} - -/// Returns a new string with all letters in lowercase. -/// Caller must free returned bytes with `allocator`. -pub fn toLowerStr( - self: Self, - allocator: mem.Allocator, - str: []const u8, -) ![]u8 { - var bytes = std.ArrayList(u8).init(allocator); - defer bytes.deinit(); - - var iter = CodePointIterator{ .bytes = str }; - var buf: [4]u8 = undefined; - - while (iter.next()) |cp| { - const len = try unicode.utf8Encode(self.toLower(cp.code), &buf); - try bytes.appendSlice(buf[0..len]); - } - - return try bytes.toOwnedSlice(); -} - -test "toLowerStr" { - const cd = try init(testing.allocator); - defer cd.deinit(testing.allocator); - - const lowered = try cd.toLowerStr(testing.allocator, "Hello, World 2112!"); - defer testing.allocator.free(lowered); - try testing.expectEqualStrings("hello, world 2112!", lowered); -} -- cgit v1.2.3