summaryrefslogtreecommitdiff
path: root/src/tg/message-entity.lisp
blob: c87dca04ba7f5a918a9f8fdc49875afba5ee0899 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
;; SPDX-License-Identifier: EUPL-1.2
;; SPDX-FileCopyrightText: 2025 Uko Kokņevičs <perkontevs@gmail.com>
(defpackage :ukkoclot/src/tg/message-entity
  (:documentation "MessageEntity Telegram type")
  (:use :c2cl :iterate :ukkoclot/src/enum :ukkoclot/src/tg/type-macros :ukkoclot/src/tg/user)
  (:import-from :serapeum :->)
  (:export
   #:message-entity-type
   #:mention
   #:hashtag
   #:cashtag
   #:bot-command
   #:url-entity
   #:email
   #:phone-number
   #:bold
   #:italic
   #:underline
   #:strikethrough
   #:spoiler
   #:blockquote
   #:expandable-blockquote
   #:code
   #:pre
   #:text-link
   #:text-mention
   #:custom-emoji

   #:message-entity
   #:make-message-entity
   #:message-entity-p
   #:copy-message-entity
   #:message-entity-offset
   #:message-entity-length
   #:message-entity-url
   #:message-entity-user
   #:message-entity-language
   #:message-entity-custom-emoji-id

   #:message-entity-extract))
(in-package :ukkoclot/src/tg/message-entity)

(define-enum message-entity-type
  (mention "mention")
  (hashtag "hashtag")
  (cashtag "cashtag")
  (bot-command "bot_command")
  (url-entity "url")
  (email "email")
  (phone-number "phone_number")
  (bold "bold")
  (italic "italic")
  (underline "underline")
  (strikethrough "strikethrough")
  (spoiler "spoiler")
  (blockquote "blockquote")
  (expandable-blockquote "expandable_blockquote")
  (code "code")
  (pre "pre")
  (text-link "text_link")
  (text-mention "text_mention")
  (custom-emoji "custom_emoji"))

(define-tg-type message-entity
  (type message-entity-type)
  (offset integer)
  (length integer)
  (url (or string null) nil)
  (user (or user null) nil)
  (language (or string null) nil)
  (custom-emoji-id (or string null) nil))

(unless (= char-code-limit #x110000)
  (error "Some UTF-16 fuckery assumes that system chars are UTF-32"))

(-> utf16-width (character) (member 1 2))
(defun utf16-width (ch)
  "Calculate the size of char in UTF-16 units."
  (declare (type character ch))
  (if (< (char-code ch) #x10000)
      1
      2))

(-> message-entity-extract (message-entity string) string)
(defun message-entity-extract (entity text)
  "Extract the text corresponding to the ENTITY from the message text (in TEXT)."
  (check-type entity message-entity)
  (check-type text string)
  (with-slots (length offset) entity
    (if (zerop length)
        ""
        (let* ((start (iterate
                        (with curr-idx16 = 0)
                        (for ch in-string text with-index curr-idx32)
                        (for curr-width = (utf16-width ch))
                        (when (or (= curr-idx16 offset)
                                  (> (+ curr-idx16 curr-width) offset))
                          (return curr-idx32))
                        (incf curr-idx16 curr-width)
                        (finally (return (length text)))))
               (end (iterate
                      (with curr-len16 = 0)
                      (for ch in-string text from start with-index curr-idx32)
                      (for curr-width = (utf16-width ch))
                      (when (>= curr-len16 length)
                        (return curr-idx32))
                      (incf curr-len16 curr-width)
                      (finally (return (length text))))))
          (subseq text start end)))))