From 34b7dc964b3516457f04d00d3ec910d3b6fd585b Mon Sep 17 00:00:00 2001 From: Uko Kokņevičs Date: Thu, 29 Aug 2024 20:41:04 +0800 Subject: thank you Q&A, now theres proper unicode support n shit --- src/main.zig | 50 ++++++++++++++++++++++++++++++-------------------- src/utils.zig | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/main.zig b/src/main.zig index 544b105..2e33cfc 100644 --- a/src/main.zig +++ b/src/main.zig @@ -199,22 +199,29 @@ fn onTextMessage(bot: *Bot, msg: types.Message, text: []const u8) !void { }, }); } else if (std.ascii.startsWithIgnoreCase(text, "big ")) { - var output = ArrayList(u8).init(bot.allocator); - defer output.deinit(); + const trimmed = try utils.trim(text[4..]); + const cd = try utils.getCD(); + if (trimmed.len > 0) { + const uppercased = try cd.toUpperStr(bot.allocator, trimmed); + defer bot.allocator.free(uppercased); - try output.appendSlice(""); - try utils.escapeXml(output.writer(), text[4..]); - try output.appendSlice(""); + var output = ArrayList(u8).init(bot.allocator); + defer output.deinit(); - try bot.sendMessage_(.{ - .chat_id = msg.chat.id, - .text = output.items, - .parse_mode = .html, - .reply_parameters = .{ - .message_id = msg.message_id, + try output.appendSlice(""); + try utils.escapeXml(output.writer(), uppercased); + try output.appendSlice(""); + + try bot.sendMessage_(.{ .chat_id = msg.chat.id, - }, - }); + .text = output.items, + .parse_mode = .html, + .reply_parameters = .{ + .message_id = msg.message_id, + .chat_id = msg.chat.id, + }, + }); + } } else if (std.ascii.eqlIgnoreCase(text, "forgor")) { try bot.sendMessage_(.{ .chat_id = msg.chat.id, @@ -244,14 +251,17 @@ fn onTextMessage(bot: *Bot, msg: types.Message, text: []const u8) !void { }, }); } else if (std.ascii.startsWithIgnoreCase(text, "say ")) { - try bot.sendMessage_(.{ - .chat_id = msg.chat.id, - .text = text[4..], - .reply_parameters = .{ - .message_id = msg.message_id, + const trimmed = try utils.trim(text[4..]); + if (trimmed.len > 0) { + try bot.sendMessage_(.{ .chat_id = msg.chat.id, - }, - }); + .text = trimmed, + .reply_parameters = .{ + .message_id = msg.message_id, + .chat_id = msg.chat.id, + }, + }); + } } else if (std.ascii.eqlIgnoreCase(text, "uwu")) { try bot.sendMessage_(.{ .chat_id = msg.chat.id, diff --git a/src/utils.zig b/src/utils.zig index c6e8508..631e464 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -2,6 +2,9 @@ const std = @import("std"); const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; +const CaseData = @import("CaseData"); +const GenCatData = @import("GenCatData"); +const Utf8View = std.unicode.Utf8View; pub fn escapeXml(writer: anytype, text: []const u8) !void { for (text) |ch| { @@ -15,6 +18,26 @@ pub fn escapeXml(writer: anytype, text: []const u8) !void { } } +var gcd_global: ?GenCatData = null; + +pub fn getGCD() !GenCatData { + if (gcd_global) |gcd| { + return gcd; + } + gcd_global = try GenCatData.init(std.heap.page_allocator); + return gcd_global.?; +} + +var cd_global: ?CaseData = null; + +pub fn getCD() !CaseData { + if (cd_global) |cd| { + return cd; + } + cd_global = try CaseData.init(std.heap.page_allocator); + return cd_global.?; +} + pub inline fn isNull(value: anytype) bool { return switch (@typeInfo(@TypeOf(value))) { .Null => true, @@ -22,3 +45,35 @@ pub inline fn isNull(value: anytype) bool { else => false, }; } + +pub fn trim(str: []const u8) ![]const u8 { + const view = try Utf8View.init(str); + const gcd = try getGCD(); + + var it = view.iterator(); + var idx: usize = 0; + const first = while (it.nextCodepoint()) |cp| { + if (!isTrimmable(gcd, cp)) { + break idx; + } + idx = it.i; + } else { + return ""; + }; + + idx = it.i; + + var last = first; + while (it.nextCodepoint()) |cp| { + if (!isTrimmable(gcd, cp)) { + last = idx + (std.unicode.utf8CodepointSequenceLength(cp) catch unreachable) - 1; + } + idx = it.i; + } + + return str[first .. last + 1]; +} + +inline fn isTrimmable(gcd: GenCatData, cp: u21) bool { + return gcd.isSeparator(cp) or gcd.isControl(cp); +} -- cgit v1.2.3