summaryrefslogtreecommitdiff
path: root/src/strings.lisp
blob: eae8ec0d2082b2fb9c4365a82eff06a6dda15f0d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
;; SPDX-License-Identifier: EUPL-1.2
;; SPDX-FileCopyrightText: 2025 Uko Kokņevičs <perkontevs@gmail.com>
(defpackage :ukkoclot/strings
  (:documentation "String-oriented utilities.")
  (:use :c2cl :iterate)
  (:import-from :cl-unicode :general-category)
  (:export
   :escape-xml
   :is-tg-whitespace
   :is-tg-whitespace-str))
(in-package :ukkoclot/strings)

;; These are very inefficient but I don't care until I profile

(defun escape-xml (str &optional out)
  "Escape special XML characters in the STR.

OUT is the output stream or `nil' for outputting to a string."
  (if out
      (escape-xml% str out)
      (with-output-to-string (out)
        (escape-xml% str out))))

(defun escape-xml% (str out)
  "See `escape-xml'.

OUT is always the stream."
  (loop for ch across str do
    (case ch
      (#\< (write-string "&lt;" out))
      (#\> (write-string "&gt;" out))
      (#\& (write-string "&amp;" out))
      (#\" (write-string "&quot;" out))
      (otherwise (write-char ch out)))))

(defun is-tg-whitespace (ch)
  "Checks if CH on its own would be considered whitespace by telegram."
  (let ((gc (general-category ch)))
    (or (string= gc "Zs")               ; Separator, space
        (string= gc "Zl")               ; Separator, line
        (string= gc "Zp")               ; Separator, paragraph
        (string= gc "Cc")               ; Other, control
        (= (char-code ch) #x2800))))    ; BRAILLE PATTERN BLANK

(defun is-tg-whitespace-str (str)
  "Checks if message containing just STR would be considered whitespace by telegram."
  (iter (for ch in-string str)
    (always (is-tg-whitespace ch))))