summaryrefslogtreecommitdiff
path: root/src/tg/message-entity.lisp
blob: 3413763808c6d2d0818ada33017d49e0455a91c8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
;; SPDX-License-Identifier: EUPL-1.2
;; SPDX-FileCopyrightText: 2025 Uko Kokņevičs <perkontevs@gmail.com>
(defpackage :ukkoclot/tg/message-entity
  (:use :c2cl :iterate :ukkoclot/tg/type-macros :ukkoclot/tg/message-entity-type :ukkoclot/tg/user)
  (:export
   #:message-entity
   #:make-message-entity
   #:message-entity-p
   #:copy-message-entity
   #:message-entity-type
   #:message-entity-offset
   #:message-entity-length
   #:message-entity-url
   #:message-entity-user
   #:message-entity-language
   #:message-entity-custom-emoji-id

   #:message-entity-extract))
(in-package :ukkoclot/tg/message-entity)

(define-tg-type message-entity
  (type message-entity-type)
  (offset integer)
  (length integer)
  (url (or string null) nil)
  (user (or user null) nil)
  (language (or string null) nil)
  (custom-emoji-id (or string null) nil))

(unless (= char-code-limit #x110000)
  (error "Some UTF-16 fuckery assumes that system chars are UTF-32"))

(defun utf16-width (ch)
  (if (< (char-code ch) #x10000)
      1
      2))

(defun message-entity-extract (entity text)
  (with-slots (length offset) entity
    (if (= length 0)
        ""
        (let* ((start (iterate
                        (with curr-idx16 = 0)
                        (for ch in-string text with-index curr-idx32)
                        (for curr-width = (utf16-width ch))
                        (when (or (= curr-idx16 offset)
                                  (> (+ curr-idx16 curr-width) offset))
                          (return curr-idx32))
                        (setq curr-idx16 (+ curr-idx16 curr-width))
                        (finally (return (length text)))))
               (end (iterate
                      (with curr-len16 = 0)
                      (for ch in-string text from start with-index curr-idx32)
                      (for curr-width = (utf16-width ch))
                      (when (>= curr-len16 length)
                        (return curr-idx32))
                      (setq curr-len16 (+ curr-len16 curr-width))
                      (finally (return (length text))))))
          (subseq text start end)))))