;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm-rsconf)

(defgeneric parse (source)
  (:documentation "Parses RSConf data from SOURCE."))

;; Just to save typing
(deftype t/char? ()
  '(or null character))

(defstruct (rsconf-parser (:copier nil)
                          (:predicate nil)
                          (:conc-name parser-))
  "Used internally to store state when parsing RSConf data."
  (stream nil :type (or null stream))
  (line 1 :type t/ufixnum)
  (col  1 :type t/ufixnum))

(define-condition rsconf-parse-error (rsconf-error)
  ((line
    :initarg :line
    :initform 0
    :type integer
    :reader error-line)

   (column
    :initarg :column
    :initform 0
    :type integer
    :reader error-column))
  (:report (lambda (err out)
             (format out "Near line ~:d column ~:d: ~a"
                     (error-line err)
                     (error-column err)
                     (apply #'format nil (simple-condition-format-control err)
                            (simple-condition-format-arguments err)))))
  (:documentation "Raised when an error occurs during parsing."))

(defmacro rsconf-parse-error (parser msg &rest fmt-args)
  `(error 'rsconf-parse-error :line (parser-line ,parser)
                              :column (parser-col ,parser)
                              :format-control ,msg
                              :format-arguments (list ,@fmt-args)))

(define-typed-fn parser-read-char ((rsconf-parser parser))
    (t/char? t)
  "Reads a single character and returns it.  This may return NIL if the end of the
stream is reached.  This handles #\Return and #\Page as well."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((ret (read-char (parser-stream parser) nil nil)))
    (if (and ret
             (or (char= ret #\Return)
                 (char= ret #\Page)))
        (rsconf-parse-error/return-found parser)
        (prog1 ret
          (cond
            ((and ret (char= ret #\Newline))
             (incf (parser-line parser))
             (setf (parser-col parser) 0))
            (t
             (incf (parser-col parser))))))))

(define-typed-fn parser-peek-char ((rsconf-parser parser))
    (t/char? t)
  "Shorthand for calling PEEK-CHAR with the peek-type of NIL."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (peek-char nil (parser-stream parser) nil nil))

(define-typed-fn parser-read-char! ((rsconf-parser parser))
    (t/char? t)
  "Reads a single character and returns it.  This may return NIL if the end of the
stream is reached.  This allows #\Return and #\Page."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((ret (read-char (parser-stream parser) nil nil)))
    (prog1 ret
      (cond
        ((and ret (char= ret #\Newline))
         (incf (parser-line parser))
         (setf (parser-col parser) 0))
        (t
         (incf (parser-col parser)))))))

(define-typed-fn skip-whitespace ((rsconf-parser parser))
    (null t)
  "Skips whitespace until a non-whitespace character is found.  This handles
#\Return and #\Page characters according to the spec."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (loop for c of-type t/char? = (parser-peek-char parser) do
    (cond
      ((null c)
       (loop-finish))
      ((whitespace-char-p c t)
       (case c
         ((#\Return #\Page)
          (rsconf-parse-error/return-found parser))
         (otherwise
          (parser-read-char parser))))
      (t
       (loop-finish))))
  nil)

(define-typed-fn skip-spaces ((rsconf-parser parser))
    (null t)
  "Skips space characters until a non-space character is found.  This only looks at
characters that does not change the line.  This handles #\Return and #\Page
characters according to the spec."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (loop for c of-type t/char? = (parser-peek-char parser) do
    (cond
      ((null c)
       (loop-finish))
      ((find c +whitespace-except-newline+ :test #'char=)
       (parser-read-char parser))
      ((or (char= c #\Return)
           (char= c #\Page))
       (rsconf-parse-error/return-found parser))
      (t
       (loop-finish))))
  nil)

(define-typed-fn read-comment ((rsconf-parser parser))
    (null t)
  "Reads a comment line.  This handles #\Return and #\Page characters according to
the spec."
  (loop for c of-type t/char? = (parser-read-char parser)
        do (cond
             ((or (null c)
                  (char= c #\Newline))
              (loop-finish))
             ((or (char= c #\Return)
                  (char= c #\Page))
              (rsconf-parse-error/return-found parser))))
  nil)

(define-typed-fn skip-whitespace-and-comments ((rsconf-parser parser))
    (null t)
  "Skips whitespace and comments until something else is found.  This handles
#\Return and #\Page characters according to the spec."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (loop for c of-type t/char? = (parser-peek-char parser) do
    (cond
      ((null c)
       (loop-finish))
      ((whitespace-char-p c t)
       (case c
         ((#\Return #\Page)
          (rsconf-parse-error/return-found parser))
         (otherwise
          (parser-read-char parser))))
      ((char= c #\;)
       (read-comment parser))
      (t
       (loop-finish))))
  nil)

(define-typed-fn read-number ((rsconf-parser parser))
    ((or integer double-float))
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((radix 10) ;; Assume decimal until determined otherwise
        (float? nil)
        (exp-char #\e)
        (have-exp-char? nil)
        (first? t)
        (ret 0))
    (declare (type (integer 2 16) radix)
             (type boolean float? have-exp-char? first?)
             (type character exp-char)
             (type (or integer double-float) ret))

    ;; We'll build the number into a string, then convert it later.
    ;;
    ;; TODO this could probably be done more efficiently without the string?
    (with-output-to-string (str)
    (let ((c (parser-peek-char parser)))
      (declare (type t/char? c))
      ;; We should never have a null character, a #\Return, or a #\Page here, as
      ;; it should have been checked by the caller.
      (assert (not (null c)))
      (assert (not (char= c #\Return)))
      (assert (not (char= c #\Page)))

      ;; We now need to see if we're starting off with a special radix (e.g. #x
      ;; or #b), or a digit.
      (cond
        ((char= c #\#)
         (parser-read-char parser)
         (setf c (parser-peek-char parser))
         (when (null c)
           (rsconf-parse-error parser "Unexpected end of stream"))

         ;; We've handled NIL, so now figure out the radix.  The code below
         ;; calls PARSER-READ-CHAR which will handle #\Return and #\Page for us.
         (case (muffling (char-downcase c))
           (#\x ;; Hexadecimal
            (parser-read-char parser)
            (setf radix 16))
           (#\o ;; Octal
            (parser-read-char parser)
            (setf radix 8))
           (#\b ;; Binary
            (parser-read-char parser)
            (setf radix 2))
           (otherwise ;; Invalid radix
            (rsconf-parse-error parser "Invalid radix character: '~a'" c)))
         (setf first? nil))

        ;; Digit?  Sign?
        ((or (digit-char-p c)
             (char= c #\+)
             (char= c #\-))
         (write-char (parser-read-char parser) str)
         (setf first? nil))

        ;; Anything else is an error
        (t
         (rsconf-parse-error parser "Expected a digit or the start of a radix")))

      ;;  Now read characters.  The parser-read-char will automatically check
      ;;  for #\Return and #\Page as we go.
      (loop for c of-type t/char? = (parser-peek-char parser) do
        (cond
          ((or (null c)
               (whitespace-char-p c t))
           (parser-read-char parser)
           (loop-finish))

          ((or (char= c #\,)
               (char= c #\])
               (char= c #\}))
           (loop-finish))

          ((digit-char-p c radix) ;; Automatically handles the radix
           (parser-read-char parser)
           (write-char c str))

          ((char= c #\.)
           (parser-read-char parser)

           ;; Switch to float mode, or error if we're already reading floats or
           ;; are using the wrong radix.
           (cond
             (float?
              (rsconf-parse-error parser "Unexpected extra period in float"))
             ((not (= radix 10))
              (rsconf-parse-error parser "Floats must be in decimal"))
             (t
              (setf float? t)))
           (write-char c str))

          ((muffling
             (or (char= (char-downcase c) #\e)
                 (char= (char-downcase c) #\d)))

           (case c
             (#\e (setf exp-char #\e))
             (#\d (setf exp-char #\d)))

           (if float?
               (if have-exp-char?
                   (rsconf-parse-error parser "Unexpected character while reading float: '~a'" c)
                   (setf have-exp-char? t))
               (if have-exp-char?
                   (rsconf-parse-error parser "Unexpected character while reading integer: '~a'" c)
                   (setf have-exp-char? t
                         float? t)))

           ;; We downcase it here in case it's an #\e.  This gets around having
           ;; to do a STRING-DOWNCASE later on when we parse for a float.
           (parser-read-char parser)
           (write-char (muffling (char-downcase c)) str))

          ((or (char= c #\-)
               (char= c #\+))
           (if float?
               (unless have-exp-char?
                 (rsconf-parse-error parser "Unexpected sign character in float number: '~a'" c))
               (unless first?
                 (rsconf-parse-error parser "Unexpected sign character in integer: '~a'" c)))

           (parser-read-char parser)
           (write-char c str))

          (t
           ;; Everything else is an error
           (rsconf-parse-error parser "Bad numeric character: '~a'" c)))

        (setf first? nil))

      ;; Now try to parse the number.  If we hit a PARSE-ERROR, raise our own
      ;; parsing error instead, though this should never happen.
      (handler-case
          (setf ret
                (cond
                  (float?
                   (assert (and (characterp exp-char)
                                (or (char= exp-char #\e)
                                    (char= exp-char #\d)))
                           (exp-char)
                           "The EXP-CHAR assertion failed (the character was ~w)" exp-char)
                   (parse-float (get-output-stream-string str) :type 'double-float :exponent-character exp-char))
                  (t
                   (parse-integer (get-output-stream-string str) :radix radix))))
        (parse-error ()
          (rsconf-parse-error parser "Could not parse numeric value")))))
    ret))

(define-typed-fn read-escaped-utf-8-char ((rsconf-parser parser) (t/ufixnum start-line start-col))
    (character)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (loop for num of-type t/char? = (parser-read-char parser)
        with code-str = (make-string-output-stream)
        do (cond
             ((null num)
              (rsconf-parse-error parser "Unterminated UTF-8 character in the string starting at ~
line ~:d column ~:d"))

             ((digit-char-p num 16) ;; Always in hex according to the spec
              (write-char num code-str))

             ((char= num #\})
              (loop-finish))

             (t
              (rsconf-parse-error parser "Invalid UTF-8 code in the string starting at ~
line ~:d column ~:d")))

        finally
           (handler-case
               (let ((code (parse-integer (get-output-stream-string code-str) :radix 16)))
                 (return-from read-escaped-utf-8-char (code-char code)))
             (error (err)
               (rsconf-parse-error parser "Could not parse escaped UTF-8 character in string starting at ~
line ~:d column ~:d: ~a" start-line start-col err)))))

(define-typed-fn read-string ((rsconf-parser parser))
    (simple-string)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((start-line (parser-line parser))
        (start-col (parser-col parser))
        (backslash? nil)
        (out (make-string-output-stream)))
    (declare (type t/ufixnum start-line start-col)
             (type boolean backslash?))
    ;; This must be true since the caller should only have peeked at the next
    ;; character.
    (assert (eql (parser-read-char parser) #\"))

    (loop for c of-type t/char? = (parser-read-char! parser) do
      (cond
        ((null c)
         (rsconf-parse-error parser "Unterminated string starting at line ~:d column ~:d (end of stream reached)"
                             start-line start-col))

        ((char= c #\")
         (cond
           (backslash?
            (write-char c out)
            (setf backslash? nil))
           (t
            (loop-finish))))

        ((char= c #\\)
         (cond
           (backslash?
            ;; Write one backslashe
            (write-char c out)
            (setf backslash? nil))
           (t
            (setf backslash? t))))

        ((char= c #\u)
         (cond
           (backslash?
            (cond
              ((eql (parser-read-char parser) #\{)
               (write-char (read-escaped-utf-8-char parser start-line start-col) out))
              (t
               ;; Write both the backslash and the #\u
               (write-char #\\ out)
               (write-char c out)))
            (setf backslash? nil))
           (t
            (write-char c out))))

        (t
         (write-char c out)
         (setf backslash? nil))))

    (get-output-stream-string out)))

(define-typed-fn read-key-name ((rsconf-parser parser))
    (simple-string)
  "Reads a key name.  This handles #\Return and #\Page charactgers according to the
spec."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (skip-whitespace parser)

  (let ((ret ""))
    ;; Check for a quoted key name first.
    (cond
      ((eql (parser-peek-char parser) #\")
       (setf ret (read-string parser))
       (skip-spaces parser)
       (unless (eql (parser-read-char parser) #\:)
         (rsconf-parse-error parser "Expected a colon after the key name")))

      (t
       ;; Not a quoted name, so now we read characters to construct the name instead.
       (let ((out (make-string-output-stream)))
         (loop for c of-type t/char? = (parser-read-char parser) do
           (cond
             ((null c)
              (rsconf-parse-error parser "Unexpected end of stream"))

             ((char= c #\:)
              (loop-finish))

             ((or (char= c #\")
                  (char= c #\{)
                  (char= c #\})
                  (char= c #\[)
                  (char= c #\]))
              (rsconf-parse-error parser "Invalid character in unquoted key name: '~a'" c))

             (t
              (write-char c out))))

         (let ((str (get-output-stream-string out)))
           (when (find #\Newline str :test #'char=)
             (rsconf-parse-error parser "Unquoted key names cannot contain newlines"))
           (setf ret (trim-whitespace! str))))))

    (when (or (zerop (length ret))
              (every #'(lambda (x)
                         (whitespace-char-p x t))
                     ret))
      (rsconf-parse-error parser "Empty key name"))
    ret))

(define-typed-fn read-boolean ((rsconf-parser parser))
    (t/bool)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((first-char (muffling (char-downcase (parser-read-char parser)))))
    (declare (type character first-char))
    (assert (or (eql first-char #\t)
                (eql first-char #\f)))

    (case first-char
      (#\t
       (let* ((r (parser-read-char parser))
              (u (parser-read-char parser))
              (e (parser-read-char parser)))
         (when (or (null r) (null u) (null e))
           (rsconf-parse-error parser "Unexpected end of stream"))
         (if (and (caseless-char= r #\r)
                  (caseless-char= u #\u)
                  (caseless-char= e #\e))
             :true
             (rsconf-parse-error parser "Bad boolean value"))))

      (#\f
       (let* ((a (parser-read-char parser))
              (l (parser-read-char parser))
              (s (parser-read-char parser))
              (e (parser-read-char parser)))
         (when (or (null a) (null l) (null s) (null e))
           (rsconf-parse-error parser "Unexpected end of stream"))
         (if (and (caseless-char= a #\a)
                  (caseless-char= l #\l)
                  (caseless-char= s #\s)
                  (caseless-char= e #\e))
             :false
             (rsconf-parse-error parser "Bad boolean value"))))

      (otherwise
       (rsconf-parse-error parser "Unexpected character where a boolean was expected: '~a'" first-char)))))

(define-typed-fn read-null ((rsconf-parser parser))
    (t/null)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((first-char (muffling (char-downcase (parser-read-char parser)))))
    (declare (type character first-char))
    (assert (eql first-char #\n))

    (let* ((i (parser-read-char parser))
           (l (parser-read-char parser)))
      (when (or (null i) (null l))
        (rsconf-parse-error parser "Unexpected end of stream"))
      (if (and (caseless-char= i #\i)
               (caseless-char= l #\l))
          :null
          (rsconf-parse-error parser "Bad null value")))))

(define-typed-fn maybe-read-comma ((rsconf-parser parser))
    (null t)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (skip-spaces parser)
  (when (eql (parser-peek-char parser) #\,)
    (parser-read-char parser))
  (skip-whitespace parser)
  nil)

(define-typed-fn read-value ((rsconf-parser parser))
    ((or null t/value))
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (skip-whitespace parser)
  (let ((ret nil))
    (loop for c of-type t/char? = (parser-peek-char parser) do
      (unless c
        (rsconf-parse-error parser "Expected a value"))

      (cond
        ((char= c #\;)
         (read-comment parser)
         (skip-whitespace parser))

        (t
         (setf ret (cond
                     ((or (digit-char-p c 10)
                          (char= c #\#)
                          (char= c #\+)
                          (char= c #\-))
                      (read-number parser))
                     ((char= c #\{)
                      (read-object parser))
                     ((char= c #\[)
                      (read-array parser))
                     ((char= c #\")
                      (read-string parser))
                     ((or (char= (muffling (char-downcase c)) #\t)
                          (char= (muffling (char-downcase c)) #\f))
                      (read-boolean parser))
                     ((char= (muffling (char-downcase c)) #\n)
                      (read-null parser))
                     (t
                      (rsconf-parse-error parser "Unexpected character where a value was expected: '~a'" c))))
         (loop-finish))))

    (maybe-read-comma parser)
    (skip-whitespace parser)
    ret))

(define-typed-fn read-object ((rsconf-parser parser))
    (hash-table)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((ret (make-hash-table :test 'equal)))
    (declare (type hash-table ret))

    (assert (eql (parser-read-char parser) #\{))
    (skip-whitespace parser)

    (loop for c of-type t/char? = (parser-peek-char parser) do
      (cond
        ((or (null c)
             (char= c #\}))
         (parser-read-char parser)
         (maybe-read-comma parser)
         (loop-finish))

        ((whitespace-char-p c t)
         (parser-read-char parser))

        ((char= c #\;)
         (read-comment parser)
         (skip-whitespace parser))

        ((or (char= c #\")
             (not (or (char= c #\{)
                      (char= c #\})
                      (char= c #\[)
                      (char= c #\]))))
         (let ((key (read-key-name parser))
               (val (read-value parser)))
           (declare (type simple-string key)
                    (type (or null t/value) val))
           (if (null val)
               (rsconf-parse-error parser "Unexpected end of stream")
               (setf (gethash key ret) val)))
         (skip-whitespace parser))

        (t
         (rsconf-parse-error parser "Expected a key or the end of an object"))))
    ret))

(define-typed-fn read-array ((rsconf-parser parser))
    (t/array)
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((ret (make-array 0 :adjustable t :fill-pointer 0)))
    (declare (type t/array ret))

    (assert (eql (parser-read-char parser) #\[))
    (skip-whitespace parser)

    (loop for c of-type t/char? = (parser-peek-char parser) do
      (cond
        ((or (null c)
             (char= c #\]))
         (parser-read-char parser)
         (maybe-read-comma parser)
         (loop-finish))

        ((whitespace-char-p c t)
         (parser-read-char parser))

        ((char= c #\;)
         (read-comment parser)
         (skip-whitespace parser))

        ((or (char= c #\")
             (char= c #\{)
             (char= c #\})
             (char= c #\[)
             (char= c #\])
             (digit-char-p c 10)
             (char= c #\+)
             (char= c #\-)
             (char= c #\t)
             (char= c #\f)
             (char= c #\n))
         (let ((val (read-value parser)))
           (declare (type (or null t/value) val))
           (if (null val)
               (rsconf-parse-error parser "Unexpected end of stream")
               (vector-push-extend val ret)))
         (skip-whitespace parser))

        (t
         (rsconf-parse-error parser "Expected a value or the end of an array"))))
    ret))

(define-typed-fn read-document ((rsconf-parser parser))
    ((or t/object t/array))
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (let ((ret nil)
        (started-object? nil))
    (declare (type (or null t/object t/array) ret)
             (type boolean started-object?))

    ;; This check was determined by writing raw bytes into a byte stream, then
    ;; converting it to a string, then reading from that string.  Since some of
    ;; these characters wouldn't necessarily be valid outside of a string
    ;; anyway, this sort of check works both for a possible Byte-Order-Mark, as
    ;; well as an initial "hey is the first character a bad one?" for these
    ;; characters.  We must do this before skipping any initial whitespace.
    ;;
    ;; https://en.wikipedia.org/wiki/Byte_order_mark
    (let ((bom-test (parser-peek-char parser)))
      (when (or #-ccl (eql bom-test #\Zero_Width_No-Break_Space)
                #+ccl (= (char-code bom-test) #xFEFF)
                (eql bom-test #\Replacement_Character)
                (eql bom-test #\+)
                (eql bom-test #\So))
        (rsconf-bom-error () "Possible byte-order-mark detected, or junk in toplevel")))

    (skip-whitespace parser)
    (loop for c of-type t/char? = (parser-peek-char parser) do
      (cond
        ((null c)
         (loop-finish))

        ((whitespace-char-p c t)
         (skip-whitespace parser))

        ((char= c #\;)
         (read-comment parser))

        ((char= c #\{)
         (when started-object?
           (rsconf-parse-error parser "Unexpected start of document object"))
         (setf ret (read-object parser))
         (loop-finish))

        ((char= c #\[)
         (when started-object?
           (rsconf-parse-error parser "Unexpected start of document array"))
         (setf ret (read-array parser))
         (loop-finish))

        ((or (char= c #\")
             (alphanumericp c))
         (unless started-object?
           (setf started-object? t)
           (assert (null ret))
           (setf ret (make-hash-table :test 'equal)))

         (assert (hash-table-p ret))
         (let ((key (read-key-name parser))
               (val (read-value parser)))
           (declare (type simple-string key)
                    (type (or null t/value) val))
           (if (null val)
               (rsconf-parse-error parser "Unexpected end of stream")
               (setf (gethash key ret) val))))

        (t
         (rsconf-parse-error parser "Unexpected character at toplevel: '~a' (~s)" c c))))

    (skip-whitespace-and-comments parser)
    (unless (null (parser-peek-char parser))
      (rsconf-parse-error parser "Unexpected junk at toplevel"))
    ret))

(defmethod parse ((source stream))
  "Parses RSConf data from an open stream."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (unless (or #+(or clisp ccl ecl) (typep source 'string-stream)
              (eql (stream-external-format source) :utf-8)
              (null (stream-external-format source))) ;; This may be null if it's a string stream.
    (rsconf-encoding-error () "Unsupported encoding: ~a" (stream-external-format source)))
  (let ((parser (make-rsconf-parser :stream source)))
    (read-document parser)))

(defmethod parse ((source pathname))
  "Parses RSConf data from a file."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (with-open-file (in source :direction :input
                             :if-does-not-exist :error
                             :external-format :utf-8) ;; SBCL, Clisp, ClozureCL,
                                                      ;; and ECL all seem to
                                                      ;; support :UTF-8 here
    (parse in)))

(defmethod parse ((source string))
  "Parses RSConf data from a string."
  (declare (optimize speed (debug 1) (safety 1) (compilation-speed 0)))
  (with-input-from-string (in source)
    (parse in)))
