;; SPDX-License-Identifier: EUPL-1.2 ;; SPDX-FileCopyrightText: 2025 Uko Kokņevičs (defpackage :ukkoclot/src/tg/message-entity (:documentation "MessageEntity Telegram type") (:use :c2cl :iterate :ukkoclot/src/enum :ukkoclot/src/tg/type-macros :ukkoclot/src/tg/user) (:export #:message-entity-type #:mention #:hashtag #:cashtag #:bot-command #:url-entity #:email #:phone-number #:bold #:italic #:underline #:strikethrough #:spoiler #:blockquote #:expandable-blockquote #:code #:pre #:text-link #:text-mention #:custom-emoji #:message-entity #:make-message-entity #:message-entity-p #:copy-message-entity #:message-entity-offset #:message-entity-length #:message-entity-url #:message-entity-user #:message-entity-language #:message-entity-custom-emoji-id #:message-entity-extract)) (in-package :ukkoclot/src/tg/message-entity) (define-enum message-entity-type (mention "mention") (hashtag "hashtag") (cashtag "cashtag") (bot-command "bot_command") (url-entity "url") (email "email") (phone-number "phone_number") (bold "bold") (italic "italic") (underline "underline") (strikethrough "strikethrough") (spoiler "spoiler") (blockquote "blockquote") (expandable-blockquote "expandable_blockquote") (code "code") (pre "pre") (text-link "text_link") (text-mention "text_mention") (custom-emoji "custom_emoji")) (define-tg-type message-entity (type message-entity-type) (offset integer) (length integer) (url (or string null) nil) (user (or user null) nil) (language (or string null) nil) (custom-emoji-id (or string null) nil)) (unless (= char-code-limit #x110000) (error "Some UTF-16 fuckery assumes that system chars are UTF-32")) (defun utf16-width (ch) "Calculate the size of char in UTF-16 units." (declare (type character ch)) (if (< (char-code ch) #x10000) 1 2)) (defun message-entity-extract (entity text) "Extract the text corresponding to the ENTITY from the message text (in TEXT)." (check-type entity message-entity) (check-type text string) (with-slots (length offset) entity (if (zerop length) "" (let* ((start (iterate (with curr-idx16 = 0) (for ch in-string text with-index curr-idx32) (for curr-width = (utf16-width ch)) (when (or (= curr-idx16 offset) (> (+ curr-idx16 curr-width) offset)) (return curr-idx32)) (incf curr-idx16 curr-width) (finally (return (length text))))) (end (iterate (with curr-len16 = 0) (for ch in-string text from start with-index curr-idx32) (for curr-width = (utf16-width ch)) (when (>= curr-len16 length) (return curr-idx32)) (incf curr-len16 curr-width) (finally (return (length text)))))) (subseq text start end)))))