const CodePointIterator = @import("code_point").Iterator; const GeneralCategories = @import("GeneralCategories"); const Data = struct { s1: []const u16 = undefined, s2: []const u44 = undefined, }; const letter_casing = letter_casing: { const data = @import("case"); break :letter_casing Data{ .s1 = &data.s1, .s2 = &data.s2, }; }; // Returns true if `cp` is either upper, lower, or title case. pub fn isCased(cp: u21) bool { return isUpper(cp) or isLower(cp) or GeneralCategories.gc(cp) == .Lt; } // Returns true if `cp` is uppercase. pub fn isUpper(cp: u21) bool { // isUpper is true if we have a mapping to a lower character (bit 1) return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 1 == 1; } /// Returns true if `str` is all non-lowercase. pub fn isUpperStr(str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { if (isLower(cp.code)) break false; } else true; } test "isUpperStr" { try testing.expect(isUpperStr("HELLO, WORLD 2112!")); try testing.expect(!isUpperStr("hello, world 2112!")); try testing.expect(!isUpperStr("Hello, World 2112!")); } /// Returns uppercase mapping for `cp`. pub fn toUpper(cp: u21) u21 { const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)]; if (case_prop & 2 == 2) { return @intCast(case_prop >> (21 + 2)); } else { return cp; } } /// Returns a new string with all letters in uppercase. /// Caller must free returned bytes with `allocator`. pub fn toUpperStr( allocator: mem.Allocator, str: []const u8, ) ![]u8 { var bytes = std.array_list.Managed(u8).init(allocator); defer bytes.deinit(); var iter = CodePointIterator{ .bytes = str }; var buf: [4]u8 = undefined; while (iter.next()) |cp| { const len = try unicode.utf8Encode(toUpper(cp.code), &buf); try bytes.appendSlice(buf[0..len]); } return try bytes.toOwnedSlice(); } test "toUpperStr" { const uppered = try toUpperStr(testing.allocator, "Hello, World 2112!"); defer testing.allocator.free(uppered); try testing.expectEqualStrings("HELLO, WORLD 2112!", uppered); } // Returns true if `cp` is lowercase. pub fn isLower(cp: u21) bool { // isLower is true if we have a mapping to an upper character (bit 2) return letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)] & 2 == 2; } /// Returns true if `str` is all non-uppercase. pub fn isLowerStr(str: []const u8) bool { var iter = CodePointIterator{ .bytes = str }; return while (iter.next()) |cp| { if (isUpper(cp.code)) break false; } else true; } test "isLowerStr" { try testing.expect(isLowerStr("hello, world 2112!")); try testing.expect(!isLowerStr("HELLO, WORLD 2112!")); try testing.expect(!isLowerStr("Hello, World 2112!")); } /// Returns lowercase mapping for `cp`. pub fn toLower(cp: u21) u21 { const case_prop = letter_casing.s2[letter_casing.s1[cp >> 8] + (cp & 0xff)]; if (case_prop & 1 == 1) { return @intCast((case_prop >> 2) & 0x1FFFFF); } else { return cp; } } /// Returns a new string with all letters in lowercase. /// Caller must free returned bytes with `allocator`. pub fn toLowerStr( allocator: mem.Allocator, str: []const u8, ) ![]u8 { var bytes = std.array_list.Managed(u8).init(allocator); defer bytes.deinit(); var iter = CodePointIterator{ .bytes = str }; var buf: [4]u8 = undefined; while (iter.next()) |cp| { const len = try unicode.utf8Encode(toLower(cp.code), &buf); try bytes.appendSlice(buf[0..len]); } return try bytes.toOwnedSlice(); } test "toLowerStr" { const lowered = try toLowerStr(testing.allocator, "Hello, World 2112!"); defer testing.allocator.free(lowered); try testing.expectEqualStrings("hello, world 2112!", lowered); } const std = @import("std"); const builtin = @import("builtin"); const mem = std.mem; const testing = std.testing; const unicode = std.unicode;