summaryrefslogtreecommitdiff
path: root/src/tg-types/message-entity.lisp
blob: fcabcce2131624731a2fb3f681beca0a328c30a3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
;; SPDX-License-Identifier: EUPL-1.2
;; SPDX-FileCopyrightText: 2025 Uko Kokņevičs <perkontevs@gmail.com>
(defpackage :ukkoclot/tg-types/message-entity
  (:use :c2cl :iterate :ukkoclot/tg-types/macros :ukkoclot/tg-types/parsers :ukkoclot/tg-types/user)
  (:export
   message-entity
   make-message-entity
   message-entity-p
   copy-message-entity
   message-entity-type
   message-entity-offset
   message-entity-length
   message-entity-url
   message-entity-user
   message-entity-language
   message-entity-custom-emoji-id

   hash->message-entity
   message-entity-extract
   parse-message-entity-array))
(in-package :ukkoclot/tg-types/message-entity)

(define-tg-type message-entity
  (type keyword nil :parser tg-string->keyword)
  (offset integer)
  (length integer)
  (url (or string null) nil)
  (user (or user null) nil)
  (language (or string null) nil)
  (custom-emoji-id (or string null) nil))

(unless (= char-code-limit #x110000)
  (error "Some UTF-16 fuckery assumes that system chars are UTF-32"))

(defun utf16-width (ch)
  (if (< (char-code ch) #x10000)
      1
      2))

(defun message-entity-extract (entity text)
  (with-slots (length offset) entity
    (if (= length 0)
        ""
        (let* ((start (iterate
                        (with curr-idx16 = 0)
                        (for ch in-string text with-index curr-idx32)
                        (for curr-width = (utf16-width ch))
                        (when (or (= curr-idx16 offset)
                                  (> (+ curr-idx16 curr-width) offset))
                          (return curr-idx32))
                        (setq curr-idx16 (+ curr-idx16 curr-width))
                        (finally (return (length text)))))
               (end (iterate
                      (with curr-len16 = 0)
                      (for ch in-string text from start with-index curr-idx32)
                      (for curr-width = (utf16-width ch))
                      (when (>= curr-len16 length)
                        (return curr-idx32))
                      (setq curr-len16 (+ curr-len16 curr-width))
                      (finally (return (length text))))))
          (subseq text start end)))))