summaryrefslogtreecommitdiff
path: root/src/strings.lisp
blob: ab9f13c2b7d690e88e19436c985cda9e82c83fe7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
;; SPDX-License-Identifier: EUPL-1.2
;; SPDX-FileCopyrightText: 2025 Uko Kokņevičs <perkontevs@gmail.com>
(defpackage :ukkoclot/src/strings
  (:documentation "String-oriented utilities.")
  (:use :c2cl :iterate)
  (:import-from :cl-unicode :general-category)
  (:import-from :serapeum :->)
  (:import-from :ukkoclot/src/streams :with-format-like-stream)
  (:export
   :escape-xml
   :is-tg-whitespace
   :is-tg-whitespace-str))
(in-package :ukkoclot/src/strings)

;; These are very inefficient but I don't care until I profile

(-> escape-xml (string &optional (or stream boolean)) (or string null))
(defun escape-xml (str &optional out-spec)
  "Escape special XML characters in the STR."
  (with-format-like-stream (out out-spec)
    (iter
      (for ch in-string str)
      (case ch
        (#\< (write-string "&lt;" out))
        (#\> (write-string "&gt;" out))
        (#\& (write-string "&amp;" out))
        (#\" (write-string "&quot;" out))
        (otherwise (write-char ch out))))))

(-> is-tg-whitespace (character) boolean)
(defun is-tg-whitespace (ch)
  "Checks if CH on its own would be considered whitespace by telegram."
  (let ((gc (general-category ch)))
    (or (string= gc "Zs")               ; Separator, space
        (string= gc "Zl")               ; Separator, line
        (string= gc "Zp")               ; Separator, paragraph
        (string= gc "Cc")               ; Other, control
        (= (char-code ch) #x2800))))    ; BRAILLE PATTERN BLANK

(-> is-tg-whitespace-str (string) boolean)
(defun is-tg-whitespace-str (str)
  "Checks if message containing just STR would be considered whitespace by telegram."
  (iter (for ch in-string str)
    (always (is-tg-whitespace ch))))