From 34b7dc964b3516457f04d00d3ec910d3b6fd585b Mon Sep 17 00:00:00 2001 From: Uko Kokņevičs Date: Thu, 29 Aug 2024 20:41:04 +0800 Subject: thank you Q&A, now theres proper unicode support n shit --- src/utils.zig | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'src/utils.zig') diff --git a/src/utils.zig b/src/utils.zig index c6e8508..631e464 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -2,6 +2,9 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; +const CaseData = @import("CaseData"); +const GenCatData = @import("GenCatData"); +const Utf8View = std.unicode.Utf8View; pub fn escapeXml(writer: anytype, text: []const u8) !void { for (text) |ch| { @@ -15,6 +18,26 @@ pub fn escapeXml(writer: anytype, text: []const u8) !void { } } +var gcd_global: ?GenCatData = null; + +pub fn getGCD() !GenCatData { + if (gcd_global) |gcd| { + return gcd; + } + gcd_global = try GenCatData.init(std.heap.page_allocator); + return gcd_global.?; +} + +var cd_global: ?CaseData = null; + +pub fn getCD() !CaseData { + if (cd_global) |cd| { + return cd; + } + cd_global = try CaseData.init(std.heap.page_allocator); + return cd_global.?; +} + pub inline fn isNull(value: anytype) bool { return switch (@typeInfo(@TypeOf(value))) { .Null => true, @@ -22,3 +45,35 @@ pub inline fn isNull(value: anytype) bool { else => false, }; } + +pub fn trim(str: []const u8) ![]const u8 { + const view = try Utf8View.init(str); + const gcd = try getGCD(); + + var it = view.iterator(); + var idx: usize = 0; + const first = while (it.nextCodepoint()) |cp| { + if (!isTrimmable(gcd, cp)) { + break idx; + } + idx = it.i; + } else { + return ""; + }; + + idx = it.i; + + var last = first; + while (it.nextCodepoint()) |cp| { + if (!isTrimmable(gcd, cp)) { + last = idx + (std.unicode.utf8CodepointSequenceLength(cp) catch unreachable) - 1; + } + idx = it.i; + } + + return str[first .. last + 1]; +} + +inline fn isTrimmable(gcd: GenCatData, cp: u21) bool { + return gcd.isSeparator(cp) or gcd.isControl(cp); +} -- cgit v1.2.3