;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm)

(defining-consts
  (+ascii-whitespace+
   (loop for code from 9 to 13 collect (code-char code) into ret
         finally (return (append (list #\Space) ret))))

  (+basic-whitespace+ '(#\Newline #\Return #\Space #\Tab #\Page))

  (+extra-whitespace+
   '(#\Newline #\Return #\Space #\Tab #\Page #\Zero_Width_Space #\No-Break_Space #\Figure_Space
     #-ccl #\Ogham_Space_Mark
     #-ccl #\En_Quad
     #-ccl #\Em_Quad
     #-ccl #\En_Space
     #-ccl #\Em_Space
     #-ccl #\Three-Per-Em_Space
     #-ccl #\Four-Per-Em_Space
     #-ccl #\Six-Per-Em_Space
     #-ccl #\Punctuation_Space
     #-ccl #\Thin_Space
     #-ccl #\Hair_Space
     #-ccl #\Narrow_No-Break_Space
     #-ccl #\Medium_Mathematical_Space
     #-ccl #\Ideographic_Space)))

(define-typed-fn trim-whitespace ((simple-string string))
    (simple-string t)
  "Removes all whitespace from the beginning and end of STRING.  This version
only checks for the characters in +BASIC-WHITESPACE+.  STRING should be a
SIMPLE-STRING."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (coerce (string-trim +basic-whitespace+ string) 'simple-string))

(define-typed-fn trim-whitespace! ((string string))
    (string t)
  "Removes all whitespace from the beginning and end of STRING.  This version
includes a lot of extra characters, not just #\Space, #\Tab, #\Page, #\Newline,
and #\Return."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (string-trim +extra-whitespace+ string))

(define-compiler-macro strings+ (&whole form &rest strings)
  (labels
      ((convertable? (x)
         (and (constantp x)
              (typep x '(or symbol string character pathname)))))
    (cond
      ((every #'convertable? strings)
       ;; We can convert this to a much faster by doing everything ahead of time.
       (with-output-to-string (out)
         (loop for thing in strings
               do (cl:write-string (etypecase thing
                                     (symbol (cl:string thing))
                                     (string thing)
                                     (character (string thing))
                                     (pathname (uiop:native-namestring thing)))
                                   out))))

      ((some #'(lambda (x)
                 (and (constantp x)
                      (typep x '(or symbol string character pathname))))
             strings)
       ;; We can precalculate a few things here.
       (with-gensyms (out val)
         `(coerce (with-output-to-string (,out)
                    ,@(loop for thing in strings
                            if (and (constantp thing)
                                    (typep thing '(or symbol string character pathname)))
                              collect `(cl:write-string
                                        ,(etypecase thing
                                           (symbol (cl:string thing))
                                           (string thing)
                                           (character (string thing))
                                           (pathname (uiop:native-namestring thing)))
                                        ,out)
                            else
                              collect `(let ((,val ,thing))
                                         (typecase ,val
                                           (string (cl:write-string ,val ,out))
                                           (character (write-char ,val ,out))
                                           (pathname (cl:write-string (uiop:native-namestring ,val) ,out))
                                           (t (cl:write-string (write-to-string ,val) ,out))))))
                  'simple-string)))

      ;; Do the full call.
      (t form))))

(declaim (ftype (function (&rest t) simple-string) strings+))
(defun strings+ (&rest strings)
  "Concatenates any number of strings or objects (written as strings) together.
This always returns a SIMPLE-STRING."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (coerce (with-output-to-string (out)
            (loop for str in strings
                  do (typecase str
                       (string (cl:write-string str out))
                       (character (write-char str out))
                       (pathname (cl:write-string (uiop:native-namestring str) out))
                       (t (cl:write-string (write-to-string str) out)))))
          'simple-string))

(declaim (ftype (function (string string &key
                                  (:start1 fixnum) (:end1 (or null fixnum))
                                  (:start2 fixnum) (:end2 (or null fixnum)))
                          boolean)
                caseless-string=)
         (inline caseless-string=))
(defun caseless-string= (str1 str2 &key (start1 0) end1 (start2 0) end2)
  "Compares STR1 to STR2 in a case-insensitive way.  START1, END1, START2, and
END2 are the same as for STRING=."
  (declare (type string str1 str2)
           (type fixnum start1 start2)
           (type (or null fixnum) end1 end2)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (string= (string-downcase str1) (string-downcase str2)
           :start1 start1 :end1 end1
           :start2 start2 :end2 end2))

(define-compiler-macro caseless-string= (&whole form str1 str2 &key (start1 0) end1 (start2 0) end2)
  (if (and (constantp str1)
           (stringp str1)
           (every #'(lambda (x)
                      (char= (char-downcase x) x))
                  str1))
      ;; STR1 is already a constant, downcased string.
      (if (and (constantp str2)
               (stringp str2)
               (every #'(lambda (x)
                          (char= (char-downcase x) x))
                      str2))
          ;; STR2 is already a constant, downcased string.  We can just call STRING= without any extra calls.
          `(string= ,str1 ,str2 :start1 ,start1 :end1 ,end1 :start2 ,start2 :end2 ,end2)

          ;; Only STR1 is a constant, downcased string.
          `(string= ,str1 (string-downcase ,str2) :start1 ,start1 :end1 ,end1 :start2 ,start2 :end2 ,end2))

      ;; STR1 is NOT already a constant, downcased string.  Is STR2?
      (if (and (constantp str2)
               (stringp str2)
               (every #'(lambda (x)
                          (char= (char-downcase x) x))
                      str2))
          ;; STR2 is already a constant, downcased string, but STR1 isn't.
          `(string= (string-downcase ,str1) ,str2 :start1 ,start1 :end1 ,end1 :start2 ,start2 :end2 ,end2)

          ;; Do the full call
          form)))

(define-typed-fn caseless-char= ((character char1 char2))
    (boolean t)
  "Compares CHAR1 to CHAR2 in a case-insensitive way."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (char= (char-downcase char1) (char-downcase char2)))

(declaim (type list +pretty-print-bytes-suffixes+))
(defconst +pretty-print-bytes-suffixes+
  '("Bytes" "KiB" "MiB" "GiB" "TiB" "PiB" "EiB" "ZiB" "YiB"))

(declaim (type list +pretty-print-bytes-base-10-suffixes+))
(defconst +pretty-print-bytes-base-10-suffixes+
  '("Bytes" "KB" "MB" "GB" "TB" "PB" "EB" "ZB" "YB"))

(declaim (type list +pretty-print-bytes-simple-suffixes+))
(defconst +pretty-print-bytes-simple-suffixes+
  '("B" "K" "M" "G" "T" "P" "E" "Z" "Y"))

(defun pretty-print-bytes (size &key always-show-in-bytes (decimal-places 2) (padding 0) (pad-char #\Space) (suffix-type :base-2))
  "Prints SIZE in such a way that it can be a human readable size.
This always outputs using binary units (e.g., KiB instead of KB).  Optional
padding can be placed on the left side of the output to ensure the resulting
string is at least PADDING characters wide, including the suffix.

SUFFIX-TYPE, and can be one of the following:
  * :BASE-2 - Returns a binary suffix, e.g. KiB
  * :BASE-10 - Returns a base-10 suffix, e.g. KB (the calculations still happen in base 2!)
  * :SIMPLE - Returns a single character suffix, e.g. K"
  (declare (type integer size)
           (type fixnum decimal-places)
           (type fixnum padding)
           (type character pad-char)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (check-type suffix-type (member :base-2 :base-10 :simple))

  (let ((mag 0)
        (human-size 0)
        (main-fmt-str "")
        (byte-fmt-str "")
        (human-fmt-str "")
        (div 1024))
    (declare (type fixnum div)
             (type integer mag)
             (type simple-string main-fmt-str byte-fmt-str human-fmt-str))
    (setf main-fmt-str (format nil "~~~d,1,0,'~a@a" padding pad-char))
    (setf byte-fmt-str (format nil "~~:d ~a"
                               (car
                                (ecase suffix-type
                                  (:base-2 +pretty-print-bytes-suffixes+)
                                  (:base-10 +pretty-print-bytes-base-10-suffixes+)
                                  (:simple +pretty-print-bytes-simple-suffixes+)))))
    (setf human-fmt-str (format nil "~~,~df ~~a" decimal-places))

    (cond
      (always-show-in-bytes
       (format nil main-fmt-str (format nil byte-fmt-str size)))

      (t (setf mag (cond
                     ((> size 0) (truncate (log size div)))
                     ((< size 0) (truncate (log (abs size) div)))
                     (t 0)))

         (setf human-size (cond
                            ((> size 0) (float (/ size (ash 1 (* mag 10)))))
                            ((< size 0) (- (float (/ (abs size) (ash 1 (* mag 10))))))
                            (t 0)))

         (format nil main-fmt-str
                 (if (= mag 0)
                     (format nil byte-fmt-str (truncate human-size))
                     (if (= decimal-places 0)
                         (format nil "~a ~a" (truncate human-size)
                                 (nth mag (ecase suffix-type
                                            (:base-2 +pretty-print-bytes-suffixes+)
                                            (:base-10 +pretty-print-bytes-base-10-suffixes+)
                                            (:simple +pretty-print-bytes-simple-suffixes+))))
                         (format nil human-fmt-str human-size
                                 (nth mag (ecase suffix-type
                                            (:base-2 +pretty-print-bytes-suffixes+)
                                            (:base-10 +pretty-print-bytes-base-10-suffixes+)
                                            (:simple +pretty-print-bytes-simple-suffixes+)))))))))))

(declaim (ftype (function (string character &key (:as-list t) (:dont-drop t) (:remove-empty t)
                                  (:limit (or null t/uint32)))
                          (or simple-vector list))
                split-string))
(defun split-string (str char &key as-list dont-drop remove-empty limit)
  "Splits a string STR at each instance of CHAR, returning a sequence containing
each substring.  If AS-LIST is non-NIL, the return value is a list, otherwise
the return value is a vector.

If DONT-DROP is non-NIL, then CHAR is included in each substring.  If
REMOVE-EMPTY, then empty strings are removed from the results.

If LIMIT is supplied, it must be a positive integer.  The string will be split
such that up to LIMIT elements are returned."
  (declare (type string str)
           (type character char)
           (type (or null t/uint32) limit)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))

  (when (= (length str) 0)
    (return-from split-string (if as-list nil #())))

  (let ((all-pos nil))
    (declare (type (or null (vector fixnum *)) all-pos)
             (dynamic-extent all-pos))
    (setf all-pos (make-array 1 :element-type 'fixnum
                                :adjustable t :fill-pointer 1
                                :initial-element 0))

    (let ((last-pos 0))
      (declare (dynamic-extent last-pos))

      ;; Get the positions of all instances of CHAR
      (loop for pos = (position char str :start (1+ last-pos) :test #'char=)
            while pos do
              (setf last-pos pos)
              (vector-push-extend pos all-pos)

              ;; Check limit
              (when (and limit (>= (length all-pos) limit))
                (loop-finish)))

      ;; Split the string using the positions we found
      (setf last-pos 0)
      (loop for pos across (subseq all-pos 1)
            for idx fixnum from 0
            with ret = (make-array (length all-pos) :element-type 'string :initial-element "") do
              (setf (aref ret idx) (subseq str last-pos (if dont-drop (1+ pos) pos)))
              (setf last-pos (1+ pos))
            finally
               (setf (aref ret (1- (length ret))) (subseq str last-pos))
               (let ((real-ret (if remove-empty
                                   (delete "" ret :test #'string=)
                                   ret)))
                 (return (if as-list (coerce real-ret 'list) real-ret)))))))

(declaim (ftype (function (string list &key (:as-list t) (:dont-drop t) (:remove-empty t)
                                  (:limit (or null t/uint32)))
                          (or simple-vector list))
                split-string*))
(defun split-string* (str char-bag &key as-list dont-drop remove-empty limit)
  "Splits a string STR at each instance of any characters in CHAR-BAG, returning a
sequence containing
each substring.  If AS-LIST is non-NIL, the return value is a list, otherwise
the return value is a vector.

If DONT-DROP is non-NIL, then the character found in CHAR-BAG is included in
each substring.  If REMOVE-EMPTY, then empty strings are removed from the
results.

If LIMIT is supplied, it must be a positive integer.  The string will be split
such that up to LIMIT elements are returned."
  (declare (type string str)
           (type list char-bag)
           (type (or null t/uint32) limit)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))

  (when (= (length str) 0)
    (return-from split-string* (if as-list nil #())))

  (let ((all-pos nil))
    (declare (type (or null (vector fixnum *)) all-pos)
             (dynamic-extent all-pos))
    (setf all-pos (make-array 1 :element-type 'fixnum
                                :adjustable t :fill-pointer 1
                                :initial-element 0))

    (let ((last-pos 0))
      (declare (dynamic-extent last-pos))

      ;; Get the positions of all instances of characters in CHAR-BAG
      (loop for pos = (muffling
                        (position-if #'(lambda (x)
                                         (declare (type character x))
                                         (find x char-bag :test #'char=))
                                     str
                                     :start (1+ last-pos)))
            while pos do
              (setf last-pos pos)
              (vector-push-extend pos all-pos)

              ;; Check limit
              (when (and limit (>= (length all-pos) limit))
                (loop-finish)))

      ;; Split the string using the positions we found
      (setf last-pos 0)
      (loop for pos across (subseq all-pos 1)
            for idx fixnum from 0
            with ret = (make-array (length all-pos) :element-type 'string :initial-element "") do
              (setf (aref ret idx) (subseq str last-pos (if dont-drop (1+ pos) pos)))
              (setf last-pos (1+ pos))
            finally
               (setf (aref ret (1- (length ret))) (subseq str last-pos))
               (let ((real-ret (if remove-empty
                                   (delete "" ret :test #'string=)
                                   ret)))
                 (return (if as-list (coerce real-ret 'list) real-ret)))))))

(defun print-indented-string (text indent-length
                              &key (max-width 80) (stream *standard-output*) indent-first-line (indent-char #\Space))
  (declare (type simple-string text)
           (type t/uint32 indent-length max-width)
           (type character indent-char)
           (type stream stream)
           (type boolean indent-first-line)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))

  (let ((pos 0)
        (max-line-width 0)
        (last-space-pos 0)
        (increment 0)
        (text-block "")
        (on-first-line (not indent-first-line)))
    (declare (type t/uint32 pos max-line-width increment last-space-pos)
             (type simple-string text-block)
             (type boolean on-first-line)
             (dynamic-extent pos max-line-width last-space-pos increment on-first-line))

    ;; POS must always be under the length of our string
    (loop while (< pos (length text)) do
      ;; We have extra to do if we're not on the first
      ;; line (or INDENT-FIRST-LINE is non-NIL)
      (cond
        ;; Subtract the size of the indent from the maximum width.
        ;; This is how much text we can print not including the
        ;; indent.
        ((or indent-first-line (not on-first-line))
         (setf max-line-width (- max-width indent-length))
         (dotimes (i indent-length)
           (declare (dynamic-extent i))
           (write-char indent-char stream)))

        ;; If we don't indent the first line, that line's maximum
        ;; width is always the same as MAX-WIDTH
        (t (setf max-line-width max-width)))

      ;; We're not on the first line after this
      (setf on-first-line nil)

      ;; See if we can write out the rest of the string on
      ;; this line or not
      (when (>= (+ pos max-line-width) (length text))
        (cl:write-string (string-trim '(#\Space) (subseq text pos)) stream)
        (return-from print-indented-string))

      ;; We couldn't write out the rest of the string, so
      ;; get the next block of text.
      (setf text-block (subseq text pos (+ pos max-line-width)))

      (cond
        ;; If this block doesn't contain a space, just write
        ;; out the entire block.  If it wraps, it
        ;; wraps... we don't handle hyphenation.
        ((not (find #\Space text-block :test #'char=))
         (cl:write-string (string-trim '(#\Space) text-block) stream)
         (write-char #\Newline stream)

         ;; Update the position
         (incf pos max-line-width))

        ;; The block does have a space.  Find the position of the
        ;; final space in the block.
        (t (setf last-space-pos (position #\Space text-block :from-end t :test #'char=))
           (setf increment max-line-width)

           (cond
             ;; If the only space is the very first
             ;; character, output the block (minus the space)
             ;; and then go to the bottom of the loop
             ((= last-space-pos 0)
              (cl:write-string (subseq text-block 1) stream)
              (write-char #\Newline stream)
              (incf pos max-line-width))

             ;; Possibly update the block so that it goes up to the last
             ;; space.
             (t (when (not (char= (elt text-block (1- max-line-width)) #\Space))
                  (setf text-block (subseq text pos (+ pos last-space-pos)))
                  (setf increment (1+ last-space-pos)))

                ;; Write out the block
                (cl:write-string (string-trim '(#\Space) text-block) stream)
                (write-char #\Newline stream)

                ;; Do some housekeeping
                (incf pos increment))))))))

(declaim (inline indent-string))
(defun indent-string (text indent-length &key (max-width 80) indent-first-line (indent-char #\Space))
  (with-output-to-string (out)
    (print-indented-string text indent-length
                           :max-width max-width :stream out
                           :indent-first-line indent-first-line :indent-char indent-char)))

(declaim (ftype (function (string string string &key (:test function)) simple-string)
                string-replace))
(defun string-replace (string part replacement &key (test #'char=))
  "Returns a new string in which all the occurences of the part is replaced with
replacement.  STRING, PART, and REPLACEMENT should all be STRINGs.

Taken from The Common Lisp Cookbook
http://cl-cookbook.sourceforge.net/index.html"
  (declare (type string string part replacement)
           (type function test)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0))
           #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))

  (coerce (with-output-to-string (out)
            (loop with part-length = (length part)
                  for old-pos fixnum = 0 then (+ pos part-length)
                  for pos = (search part string
                                    :start2 old-pos
                                    :test test)
                  do (cl:write-string string out
                                      :start old-pos
                                      :end (or pos (length string)))
                  when pos do (cl:write-string replacement out)
                    while pos))
          'simple-string))

(defun string-replace-with (string replacements &key (test #'char=))
  "Returns a new string in which all the occurences of the parts in REPLACEMENT
is replaced with their correspondingreplacement.  STRING should be a STRING,
while REPLACEMENTS shoud be a LIST or VECTOR of CONSes where the CAR is the part
to be replaced and the CDR is the replacement.  The CAR and CDR can both be
either a CHARACTER, a character code, or a STRING."
  (declare (optimize speed (safety 1) (debug 1) (compilation-speed 0)))
  ;;#+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
  (check-type string string)
  (check-type replacements (or vector list))
  (check-type test function)

  (let ((str (coerce string 'simple-string)))
    (declare (type simple-string str))
    (with-output-to-string (out)
      (loop with len fixnum = (muffling (length replacements))
            for i fixnum from 0 below len
            for pair = (muffling (elt replacements i))
            for part string = (etypecase (car pair)
                                (string (car pair))
                                (character (string (the character (car pair))))
                                (integer (string (code-char (car pair)))))
            for repl string = (etypecase (cdr pair)
                                (string (cdr pair))
                                (character (string (the character (cdr pair))))
                                (integer (string (code-char (cdr pair)))))
            do (loop for old-pos fixnum = 0 then (+ pos (length part))
                     for pos = (muffling (search part str :start2 old-pos :test test))
                     do (cl:write-string str out :start old-pos :end (or pos (length str)))
                     when pos
                       do (cl:write-string repl out)
                     while pos
                     finally (setf str (get-output-stream-string out)))))
    str))

(defparameter *string-sanitize*
  '#((#x1b . "ESC")
     (#x07 . "BEL")
     (#x00 . "NUL")
     (#\^ . "CFLEX")
     (#x08 . "BS")
     (#x09 . "HT")
     (#X0B . "VT")
     (#X0C . "FF")
     (#X1A . "SUB")
     (#x01 . "SOH")
     (#x02 . "STX")
     (#x03 . "ETX")
     (#x04 . "EOT")
     (#x05 . "ENQ")
     (#x06 . "ACK")
     (#X0E . "SO")
     (#X0F . "SI")
     (#X7F . "DEL")))

(defparameter *string-sanitize-extra*
  '#((#x0A . "LF")
     (#x0D . "CR")))

(define-typed-fn string-sanitize ((string string) &optional leave-extra)
    (simple-string)
  "Removes meta characters (ESC, BEL, etc.) and the NULL character from STRING,
and replaces them with their names.  This returns a new SIMPLE-STRING.  If
LEAVE-EXTRA is truthy, then Carriage Return and Line Feed are also removed.

The definitions for what to remove are stored in *STRING-SANITIZE* and
*STRING-SANITIZE-EXTRA*."
  (declare (optimize (speed 3) (debug 1) (safety 1) (compilation-speed 0)))
  (let ((ret (string-replace-with string *string-sanitize*)))
    (unless leave-extra
      (setf ret (string-replace-with ret *string-sanitize-extra*)))
    ret))

(declaim (ftype (function (simple-string (or character simple-string) &key (:test function)) boolean)
                string-starts-with)
         (inline string-starts-with))
(defun string-starts-with (string part &key (test #'char=))
  "Returns T if STRING begins with PART, or NIL otherwise."
  (declare (type simple-string string)
           (type function test)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (etypecase part
    (character (equal (search (coerce (string part) 'simple-string) string :test test) 0))
    (string (equal (search part string :test test) 0))))

(declaim (ftype (function (simple-string (or character simple-string) &key (:test function)) boolean)
                string-ends-with)
         (inline string-ends-with))
(defun string-ends-with (string part &key (test #'char=))
  "Returns T if STRING has PART at the end of it, or NIL otherwise.  PART can be
a string or a character."
  (declare (type simple-string string)
           (type function test)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (etypecase part
    (character
     (equal (search (coerce (string part) 'simple-string) string :test test :from-end t)
            (- (length string) (length (string part)))))

    (string (equal (search part string :test test :from-end t)
                   (- (length string) (length part))))))

(defmacro string-case (str &body clauses)
  "Checks to see if STR matches any of CLAUSES, and runs the body of the
matching clause.  If no clause matches, and an OTHERWISE clause exists, the
OTHERWISE clause is instead run.

CLAUSES is structured the same as in a normal CASE statement."
  (with-gensyms (val)
    (loop for clause in clauses
          with forms = ()
          do (typecase (car clause)
               (string
                (push (list* (list 'string= val (car clause))
                             (cdr clause))
                      forms))

               (list
                (push (list*
                       (list* 'or (mapcar
                                   #'(lambda (sub-str)
                                       (unless (stringp sub-str)
                                         (error "Clause condition list is invalid: ~a" (car clause)))
                                       (list 'string= val sub-str))
                                   (car clause)))
                       (cdr clause))
                      forms))

               (symbol
                (unless (string= (symbol-name (car clause)) "OTHERWISE")
                  (error "Invalid clause condition: ~a" (car clause)))
                (push (list* 't (cdr clause)) forms))

               (otherwise
                (error "Clause condition is not a string, list of strings, or OTHERWISE")))
          finally (return `(let ((,val ,str)) (cond ,@(reverse forms)))))))

(define-typed-fn empty-string-p ((simple-string str))
    (boolean t)
  "Returns T if STR is an empty string, or NIL otherwise.  This disregards
whitespace, so a string of ' ' is still considered empty by this function.

This version only checks for #\Newline, #\Return, #\Space, #\Tab, and #\Page.

STR should be a SIMPLE-STRING."
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (string= (trim-whitespace str) ""))

(define-typed-fn empty-string-p! ((string str))
    (boolean t)
  "Returns T if STR is an empty string, or NIL otherwise.  This disregards
whitespace, so a string of ' ' is still considered empty by this function.

This version includes a lot of extra characters, not just #\Space, #\Tab,
#\Page, #\Newline, and #\Return."
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (string= (trim-whitespace! str) ""))

(defmacro char-in-range-p (char ranges &rest extras)
  "Checks to see if CHAR is within one or more ranges.  Each range should be a
CONS where the CAR is the lower bounds and the CDR is the upper.  Any additional
single character checks can be specified in EXTRAS as a list.

Characters for the upper and lower bounds, as well as individual characters, can
be specified either as a character literal, a string, or an integer."
  (with-gensyms (c)
    ;; Get the character's code once
    `(let ((,c (char-code ,char)))
       (or

        ;; Build each range
        ,@(loop for range in ranges
                if (typep range 'cons)
                  collect `(and

                            ;; Lower bound
                            (>= ,c (char-code
                                    ,(typecase (car range)
                                       (character
                                        (car range))

                                       (string
                                        (if (> (length (car range)) 1)
                                            (error "Range string too long")
                                            (elt (car range) 0)))

                                       (integer
                                        (code-char (car range)))

                                       (t (error "Invalid range expression: ~a" range)))))

                            ;; Upper bound
                            (<= ,c (char-code
                                    ,(typecase (cdr range)
                                       (character
                                        (cdr range))

                                       (string
                                        (if (> (length (cdr range)) 1)
                                            (error "Range string too long")
                                            (elt (cdr range) 0)))

                                       (integer
                                        (code-char (cdr range)))

                                       (t (error "Invalid range expression"))))))
                    into ret ;; End of LOOP's COLLECT block

                             ;; Whoops, not a CONS cell
                else do (error "RANGES must be a list of CONSes")
                finally (return ret))

        ;; Append extra single-character checks
        ,@(loop for extra in extras
                nconc `((= ,c (char-code
                               ;; Expand each extra character check
                               ,(typecase extra
                                  (character
                                   extra)

                                  (string
                                   (if (> (length extra) 1)
                                       (error "Extra is too long")
                                       (elt extra 0)))

                                  (integer
                                   (code-char extra))

                                  (t (error "Invalid extra expression (type: ~a)"
                                            (type-of extra))))))))))))
(trivial-indent:define-indentation char-in-range-p (4 2 2))

(declaim (ftype (function ((or character string) &rest T) simple-string) strings-join))
(defun strings-join (joiner &rest strings)
  "Concatenates STRINGS together with JOIN placed between each string.  JOINER
can be a CHARACTER or a STRING.  JOINER is never appended to the end of the
resulting string."
  (declare (type (or character string) joiner)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((joiner-fn (etypecase joiner
                     (character #'write-char)
                     (string #'cl:write-string))))
    (declare (type function joiner-fn))
    (with-output-to-string (out)
      (loop with len fixnum = (length strings)
            for str string in strings
            for i fixnum from 1 below len
            do (cl:write-string str out)
               (funcall joiner-fn joiner out)
            finally (cl:write-string (or (seq-last strings) "")
                                     out)))))

(declaim (ftype (function ((or character string) &rest T) simple-string) strings-join!))
(defun strings-join! (joiner &rest strings)
  "Concatenates STRINGS together with JOIN placed between each string.  JOINER
can be a CHARACTER or a STRING.  Unlike STRINGS-JOIN, this function always
appends JOINER onto the end of the resulting string."
  (declare (type (or character string) joiner)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((joiner-fn (etypecase joiner
                     (character #'write-char)
                     (string #'cl:write-string))))
    (declare (type function joiner-fn))
    (with-output-to-string (out)
      (loop for str string in strings
            do (cl:write-string str out)
               (funcall joiner-fn joiner out)))))

(declaim (ftype (function ((or character string) vector) simple-string) strings-join-vector))
(defun strings-join-vector (joiner vector)
  "Concatenates VECTOR together with JOIN placed between each string.  JOINER
can be a CHARACTER or a STRING.  JOINER is never appended to the end of the
resulting string.  VECTOR must contain only strings."
  (declare (type vector vector)
           (type (or character string) joiner)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((joiner-fn (etypecase joiner
                     (character #'write-char)
                     (string #'cl:write-string))))
    (declare (type function joiner-fn))
    (with-output-to-string (out)
      (loop with len fixnum = (length vector)
            for i of-type t/ufixnum from 1 below len
            for sidx of-type t/ufixnum from 0
            for str = (muffling (elt vector sidx))
            do (cl:write-string str out)
               (funcall joiner-fn joiner out)
            finally (cl:write-string (or (seq-last vector) "")
                                     out)))))

(declaim (ftype (function ((or character string) vector) simple-string) strings-join-vector!))
(defun strings-join-vector! (joiner vector)
  "Concatenates VECTOR together with JOIN placed between each string.  JOINER
can be a CHARACTER or a STRING.  Unlike STRINGS-JOIN, this function always
appends JOINER onto the end of the resulting string.  VECTOR must contain only
strings."
  (declare (type (or character string) joiner)
           (type vector vector)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((joiner-fn (etypecase joiner
                     (character #'write-char)
                     (string #'cl:write-string))))
    (declare (type function joiner-fn))
    (with-output-to-string (out)
      (loop for i of-type t/ufixnum from 0 below (length vector)
            for str = (muffling (elt vector i))
            do (cl:write-string str out)
               (funcall joiner-fn joiner out)))))

(declaim (ftype (function (string character) (values string (or string null) (or string null)))
                string-partition))
(defun string-partition (string char)
  "Searches for CHAR in STRING.  If CHAR is found, then this returns three values:
The string before CHAR, a string containing CHAR, and the substring after CHAR.

If CHAR is not found, this returns three values: the original string, NIL, and
NIL.

For example, calling (STRING-PARTITION \"foo*bar\" #\*) will yield the values
\"foo\", \"*\", and \"bar\".

Calling (STRING-PARTITION \"foobar\" #\*) will yield the values \"foobar\", NIL,
and NIL."
  (declare (type string string)
           (type character char)
           (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let* ((char-pos (position char string :test #'char=))
         (left-part (if char-pos
                       (subseq string 0 char-pos)
                       string))
         (cstr (if char-pos (string char) nil))
         (right-part (if char-pos
                         (subseq string (1+ char-pos))
                         nil)))
    (declare (type (or null fixnum) char-pos))
    (values left-part cstr right-part)))

(define-typed-fn whitespace-char-p ((character char) &optional full?)
    (boolean t)
  "Returns T if CHAR is a whitespace character, or NIL otherwise.  If FULL? is
truthy, then all whitespace characters are checked (+EXTRA-WHITESPACE+),
otherwise only basic whitespace characters are checked (+BASIC-WHITESPACE+)."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (if full?
      (if (find char +extra-whitespace+ :test #'char=) t nil)
      (if (find char +basic-whitespace+ :test #'char=) t nil)))

(define-typed-fn ascii-char-p ((character char))
    (boolean t)
  "Returns T if CHAR is an ASCII character, or NIL otherwise."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (< (char-code char) 128))

(define-compiler-macro ascii-char-p (&whole form char)
  (if (and (constantp char)
           (characterp char))
      (< (char-code char) 128)
      form))

(define-typed-fn ascii-control-char-p ((character char))
    (boolean t)
  "Returns T if CHAR is an ASCII control character, or NIL otherwise."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((code (char-code char)))
    (or (< code #x20)
        (= code #x7F))))

(define-compiler-macro ascii-control-char-p (&whole form char)
  (if (and (constantp char)
           (characterp char))
      (let ((code (char-code char)))
        (or (< code #x20)
            (= code #x7F)))
      form))

(define-typed-fn ascii-lower-case-char-p ((character char))
    (boolean t)
  "Returns T if CHAR is an ASCII character and is lowercase, or NIL otherwise."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (char<= #\a char #\z))

(define-compiler-macro ascii-lower-case-char-p (&whole form char)
  (if (and (constantp char)
           (characterp char))
      (char<= #\a char #\z)
      form))

(define-typed-fn ascii-upper-case-char-p ((character char))
    (boolean t)
  "Returns T if CHAR is an ASCII character and is uppercase, or NIL otherwise."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (char<= #\A char #\Z))

(define-compiler-macro ascii-upper-case-char-p (&whole form char)
  (if (and (constantp char)
           (characterp char))
      (char<= #\A char #\Z)
      form))

(define-typed-fn ascii-alpha-p ((character char))
    (boolean t)
  "Returns T if CHAR is an ASCII letter (A-Z, a-z), or NIL otherwise."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (and (ascii-char-p char)
       (alpha-char-p char)))

(define-compiler-macro ascii-alpha-p (&whole form char)
  (if (and (constantp char)
           (characterp char))
      (and (ascii-char-p char)
           (alpha-char-p char))
      form))

(define-typed-fn ascii-alphanumeric-p ((character char))
    (boolean t)
  "Returns T if CHAR is an ASCII character and alphanumeric, or NIL otherwise."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (and (ascii-char-p char)
       (alphanumericp char)))

(define-compiler-macro ascii-alphanumeric-p (&whole form char)
  (if (and (constantp char)
           (characterp char))
      (and (ascii-char-p char)
           (alphanumericp char))
      form))

(define-typed-fn char->utf-8 ((character char))
    (t/uint32 t)
  "Converts CHAR to its encoded UTF-8 representation.

For example the character 道 (code point #x9053) is encoded to bytes (#xE9 #x81
#x93), and returned as integer 9667049."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((code (char-code char)))
    (declare (type t/uint32 code))
    (cond
      ((<= code #x7F)
       code)

      ((<= code #x07FF)
       (logior #b11000000 (ldb (byte 5 6) code)
               (ash (logior #b10000000 (ldb (byte 6 0) code)) 8)))

      ((<= code #xFFFF)
       (logior (logior #b11100000 (ldb (byte 4 12) code))
               (ash (logior #b10000000 (ldb (byte 6 6) code)) 8)
               (ash (logior #b10000000 (ldb (byte 6 0) code)) 16)))

      ((<= code #x10FFFF)
       (logior (logior #b11110000 (ldb (byte 3 18) code))
               (ash (logior #b10000000 (ldb (byte 6 12) code)) 8)
               (ash (logior #b10000000 (ldb (byte 6 6) code)) 16)
               (ash (logior #b10000000 (ldb (byte 6 0) code)) 24)))

      (t
       (muffling (error "Exceeded UTF-8"))))))
