;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;; HJSON code based on hjson-go, Copyright (C) 2016, 2017 Christian Zangl
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm-hjson)

;;;;
;;;; HJSON Decoding
;;;;

(defstruct (parser (:copier nil))
  (data        ""                     :type simple-string)
  (at          0                      :type fixnum)
  (ch          #\Nul                  :type character)
  (bool-conv   #'bool-identity        :type function)
  (key-find-fn #'sdm:caseless-string= :type function))

(define-typed-fn parser-len ((parser parser))
    (fixnum t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (length (parser-data parser)))

(define-typed-fn parser-nth ((parser parser) (fixnum idx))
    (character t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (char (parser-data parser) idx))

(define-condition decode-error (hjson-error line-col-error)
  ((offending-line
    :initarg :offending-line
    :initform "(no offending line)"
    :reader error-offending-line))
  (:documentation "Represents an error that occurred during HJSON parsing."))

(define-typed-fn error-at ((parser parser) (string msg) &rest fmt-args)
    (null)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (let ((col 0) (line 1) (sam-end 0))
    (declare (type fixnum col line sam-end))

    (loop for i fixnum from (1- (parser-at parser)) above 0
          while (not (char= (parser-nth parser i) #\Newline))
          do (incf col)
          finally (loop for j fixnum from i above 0
                        do (when (char= (parser-nth parser j) #\newline)
                             (incf line))))

    (setf sam-end (+ (- (parser-at parser) col) 20))
    (when (> sam-end (parser-len parser))
      (setf sam-end (parser-len parser)))

    (error 'decode-error
           :format-arguments fmt-args
           :format-control msg
           :line line
           :column col
           :offending-line (subseq (parser-data parser)
                                   (- (parser-at parser) col)
                                   sam-end))))

(defgeneric assign-object-slots (parser obj data))

(defun bool-identity (val)
  "When VAL is non-NIL, this returns :TRUE, otherwise it returns :FALSE."
  (if val
      :true
      :false))

(define-typed-fn string-stream-len ((string-stream stream))
    (fixnum t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (let ((data (get-output-stream-string stream)))
    (cl:write-string data stream)
    (length data)))

(define-typed-fn string-stream-peek ((string-stream stream))
    (simple-string t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (let ((data (get-output-stream-string stream)))
    (cl:write-string data stream)
    data))

(define-typed-fn parser-reset-at ((parser parser))
    (null t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (setf (parser-at parser) 0)
  (setf (parser-ch parser) #\Space)
  nil)

(define-typed-fn punctuator-char-p ((character char))
    (boolean t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (or (char= char #\{)
      (char= char #\})
      (char= char #\[)
      (char= char #\])
      (char= char #\,)
      (char= char #\:)))

(define-typed-fn parser-next ((parser parser))
    (character t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (cond
    ((< (parser-at parser) (parser-len parser))
     (prog1 (setf (parser-ch parser) (parser-nth parser (parser-at parser)))
       (incf (parser-at parser))))

    (t
     (setf (parser-ch parser) #\Nul))))

(define-typed-fn parser-peek ((parser parser) (fixnum offset))
    (character t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (let ((pos (+ (parser-at parser) offset)))
    (declare (type fixnum pos))

    (if (and (>= pos 0) (< pos (parser-len parser)))
        (parser-nth parser (+ (parser-at parser) offset))
        #\Nul)))

(declaim (type list +escapee+))
(defconst +escapee+
  '((#\" . #\")
    (#\' . #\')
    (#\\ . #\\)
    (#\/ . #\/)
    (#\b . #\Bel)
    (#\f . #\Page)
    (#\n . #\Newline)
    (#\r . #\Return)
    (#\t . #\Tab)))

;; Parse a HJSON value.  It could be an object, an array, string, number, or
;; word.
(define-typed-fn parser-read-value ((parser parser))
    ((or hash-table vector list simple-string) t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (parser-white parser)
  (case (parser-ch parser)
    (#\{ (parser-read-object parser nil))
    (#\[ (parser-read-vector parser))
    (#\" (parser-read-string parser t))
    (#\' (parser-read-string parser t))
    (otherwise (parser-read-tfnns parser))))

(define-typed-fn parser-read-ml-string ((parser parser))
    (simple-string)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (let ((triple 0)
        (indent 0) ;; We are at ''' +1 - get indent
        (out (make-string-output-stream)))
    (declare (type fixnum triple indent)
             (type string-stream out))

    (loop for c character = (parser-peek parser (- (* -1 indent) 5))
          if (or (char= c #\Nul) (char= c #\Newline))
            do (loop-finish)
          else do (incf indent))

    (flet ((skip-indent ()
             (loop for skip fixnum from indent above 0
                   while (and
                          (char> (parser-ch parser) #\Nul)
                          (char<= (parser-ch parser) #\Space)
                          (not (char= (parser-ch parser) #\Newline)))
                   do (parser-next parser))
             nil))
      (declare (inline skip-indent))

      ;; Skip white/to (newline)
      (loop while (and (char> (parser-ch parser) #\Nul)
                       (char<= (parser-ch parser) #\Space)
                       (not (char= (parser-ch parser) #\Newline)))
            do (parser-next parser))

      (when (char= (parser-ch parser) #\Newline)
        (parser-next parser)
        (skip-indent))

      (loop with last-lf? = nil do
        (tagbody
           (cond
             ((char= (parser-ch parser) #\Nul)
              (error-at parser "Bad multiline string"))

             ((char= (parser-ch parser) #\')
              (incf triple)
              (parser-next parser)

              (when (= triple 3)
                (let ((sres (get-output-stream-string out)))
                  (if last-lf?
                      (return-from parser-read-ml-string (subseq sres 0 (1- (length sres))))
                      (return-from parser-read-ml-string sres))))
              (go ==loop-bottom==))

             (t
              (dotimes (i triple)
                (write-char #\' out))
              (setf triple 0)
              (setf last-lf? nil)))

           (cond
             ((char= (parser-ch parser) #\Newline)
              (write-char #\Newline out)
              (setf last-lf? t)
              (parser-next parser)
              (skip-indent))

             (t
              (unless (char= (parser-ch parser) #\Return)
                (write-char (parser-ch parser) out)
                (setf last-lf? nil))
              (parser-next parser)))

         ==loop-bottom==)))))

(define-typed-fn parser-read-string ((parser parser) (boolean allow-ml))
    (simple-string)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  ;; callers make sure that (ch === '"' || ch === "'")
  ;; When parsing for string values, we must look for " and \
  ;; characters.
  (let ((exit-ch (parser-ch parser))
        (out (make-string-output-stream)))
    (loop while (parser-next parser) do
      (cond
        ((char= (parser-ch parser) exit-ch)
         (parser-next parser)

         ;; Check for multiline string
         (cond
           ((and allow-ml
                 (char= exit-ch #\')
                 (char= (parser-ch parser) #\')
                 (= (string-stream-len out) 0))
            (parser-next parser)
            (return-from parser-read-string (parser-read-ml-string parser)))

           ;; Not multiline
           (t (return-from parser-read-string (get-output-stream-string out)))))

        ((char= (parser-ch parser) #\\)
         (parser-next parser)
         (cond
           ((char= (parser-ch parser) #\u)
            (let ((uffff 0))
              (declare (type fixnum uffff))

              (dotimes (i 4)
                (let ((hex 0))
                  (declare (type fixnum hex))
                  (parser-next parser)

                  (cond
                    ((and (char>= (parser-ch parser) #\0)
                          (char<= (parser-ch parser) #\9))
                     (setf hex (- (char-code (parser-ch parser))
                                  (char-code #\0))))

                    ((and (char>= (parser-ch parser) #\a)
                          (char<= (parser-ch parser) #\f))
                     (setf hex (+ (- (char-code (parser-ch parser))
                                     (char-code #\a))
                                  #xa)))

                    ((and (char>= (parser-ch parser) #\A)
                          (char<= (parser-ch parser) #\F))
                     (setf hex (+ (- (char-code (parser-ch parser))
                                     (char-code #\A))
                                  #xa)))

                    (t (error-at parser "Bad \\u char '~a'" (parser-ch parser))))

                  (setf uffff (+ (the fixnum (* uffff 16)) hex))))

              (write-char (code-char uffff) out)))

           ((assoc (parser-ch parser) +escapee+ :test #'char=)
            (write-char (cdr (assoc (parser-ch parser) +escapee+ :test #'char=))
                        out))

           (t (error-at parser "Bad escape \\~a" (parser-ch parser)))))

        ((or (char= (parser-ch parser) #\Newline)
             (char= (parser-ch parser) #\Return))
         (error-at parser "Bad string containing newline"))

        (t (write-char (parser-ch parser) out))))))

(define-typed-fn parser-read-key-name ((parser parser))
    (simple-string)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  ;; Quotes for keys are optional in HJSON, unless they include {}[],:
  ;; or whitespace
  (when (or (char= (parser-ch parser) #\")
            (char= (parser-ch parser) #\'))
    (return-from parser-read-key-name (parser-read-string parser nil)))

  (with-output-to-string (out)
    (let ((start (parser-at parser))
          (space -1))

      (loop do
        (cond
          ((char= (parser-ch parser) #\:)
           (cond
             ((= (string-stream-len out) 0)
              (error-at
               parser "Found ':' but no key name (for an empty name, use quotes)"))

             ((and (> space 0) (not (= space (string-stream-len out))))
              (setf (parser-at parser) (+ start space))
              (error-at parser
                        "Found whitespace in your key name (use quotes to include)"))

             (t (loop-finish))))

          ((char<= (parser-ch parser) #\Space)
           (when (char= (parser-ch parser) #\Nul)
             (error-at parser
                       "Found EOF while looking for a key name (check your syntax)"))

           (when (< space 0)
             (setf space (string-stream-len out))))

          (t
           (when (punctuator-char-p (parser-ch parser))
             (error-at parser "Found '~a' where a key name was expected (check your syntax or use quotes if the ~
key name includes {}[],: or whitespace)"
                       (parser-ch parser)))

           (write-char (parser-ch parser) out)))
        (parser-next parser)))))

(define-typed-fn parser-white ((parser parser))
    (null)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (loop while (char> (parser-ch parser) #\Nul) do
    ;; Skip whitespace
    (loop while (and (char> (parser-ch parser) #\Nul)
                     (char<= (parser-ch parser) #\Space))
          do (parser-next parser))

    ;; HJSON allows comments
    (cond
      ;; // style comments
      ((or (char= (parser-ch parser) #\#)
           (and (char= (parser-ch parser) #\/)
                (char= (parser-peek parser 0) #\/)))
       (loop while (and (char> (parser-ch parser) #\Nul)
                        (not (char= (parser-ch parser) #\Newline)))
             do (parser-next parser)))

      ;; /* */ style comments
      ((and (char= (parser-ch parser) #\/)
            (char= (parser-peek parser 0) #\*))
       (parser-next parser)
       (parser-next parser)

       (loop while (and (char> (parser-ch parser) #\Nul)
                        (not (and (char= (parser-ch parser) #\*)
                                  (char= (parser-peek parser 0) #\/))))
             do (parser-next parser))

       (when (char> (parser-ch parser) #\Nul)
         (parser-next parser)
         (parser-next parser)))

      ;; No comment
      (t (return-from parser-white))))
  nil)

;; HJSON strings can be quoteless.  This returns a string, a boolean
;; converted according to the parser's BOOL-CONV function, or nil.
(define-typed-fn parser-read-tfnns ((parser parser))
    (simple-string)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (when (punctuator-char-p (parser-ch parser))
    (error-at parser
              "Found a punctuator character '~a' when expecting a quoteless string (check your syntax)"
              (parser-ch parser)))

  (sdm:trim-whitespace
   (with-output-to-string (out)
     (let ((chf (parser-ch parser))
           (eol? nil))
       (write-char (parser-ch parser) out)

       (loop do
         (parser-next parser)

         (setf eol? (or (char= (parser-ch parser) #\Return)
                        (char= (parser-ch parser) #\Newline)
                        (char= (parser-ch parser) #\Nul)))

         (when (or eol?
                   (char= (parser-ch parser) #\,)
                   (char= (parser-ch parser) #\})
                   (char= (parser-ch parser) #\])
                   (char= (parser-ch parser) #\#)

                   (and (char= (parser-ch parser) #\/)
                        (or (char= (parser-peek parser 0) #\/)
                            (char= (parser-peek parser 0) #\*))))

           (case chf
             (#\f
              (when (locally
                        (declare #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
                      (string= (sdm:trim-whitespace (string-stream-peek out)) "false"))
                (return-from parser-read-tfnns
                  (funcall (parser-bool-conv parser) nil))))

             (#\n
              (when (locally
                        (declare #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
                      (string= (sdm:trim-whitespace (string-stream-peek out)) "null"))
                (return-from parser-read-tfnns nil)))

             (#\t
              (when (locally
                        (declare #+sbcl (sb-ext:muffle-conditions sb-ext:compiler-note))
                      (string= (sdm:trim-whitespace (string-stream-peek out)) "true"))
                (return-from parser-read-tfnns
                  (funcall (parser-bool-conv parser) t))))

             (otherwise
              (when (or (char= chf #\-)
                        (and (char>= chf #\0)
                             (char<= chf #\9)))
                (handler-case
                    (return-from parser-read-tfnns
                      (try-parse-number (string-stream-peek out)))
                  (hjson-error ())))))

           (when eol?
             (loop-finish)))

         (write-char (parser-ch parser) out))))))

(define-typed-fn parser-read-vector ((parser parser))
    (vector)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (let ((ret (make-array 0 :adjustable t :fill-pointer 0))
        (val nil))
    (declare (type vector ret))

    (parser-next parser)
    (parser-white parser)

    ;; Check for empty list
    (when (char= (parser-ch parser) #\])
      (parser-next parser)
      (return-from parser-read-vector ret))

    (loop while (char> (parser-ch parser) #\Nul) do
      (setf val (parser-read-value parser))
      (vector-push-extend val ret)
      (parser-white parser)

      ;; In HJSON, the comma is optional and trailing commas are allowed
      (when (char= (parser-ch parser) #\,)
        (parser-next parser)
        (parser-white parser))

      (when (char= (parser-ch parser) #\])
        (parser-next parser)
        (return-from parser-read-vector ret))
      (parser-white parser)))

  (error-at parser
            "End of input while parsing an array (did you forget a closing ']'?)")
  (error "Unexpectedly reached point after error-at"))

(define-typed-fn parser-read-object ((parser parser) (boolean without-braces))
    (hash-table)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (let ((ret (make-hash-table :test 'equal))
        (key nil))
    (unless without-braces
      ;; Assuming the next character is '{'...
      (parser-next parser))

    (parser-white parser)
    (when (and (char= (parser-ch parser) #\}) (not without-braces))
      (parser-next parser)
      (return-from parser-read-object ret))

    (loop while (char> (parser-ch parser) #\Nul) do
      (setf key (parser-read-key-name parser))
      (parser-white parser)

      (unless (char= (parser-ch parser) #\:)
        (error-at parser "Expected ':' instead of '~a'" (parser-ch parser)))

      (parser-next parser)

      ;; Duplicate keys overwrite the previous value
      (setf (gethash key ret) (parser-read-value parser))
      (parser-white parser)

      ;; In HJSON, the comma is optional and trailing commas are
      ;; allowed
      (when (char= (parser-ch parser) #\,)
        (parser-next parser)
        (parser-white parser))

      (when (and (char= (parser-ch parser) #\}) (not without-braces))
        (parser-next parser)
        (return-from parser-read-object ret))

      (parser-white parser))

    (unless without-braces
      (error-at parser
                "End of input while parsing an object (did you forget a closing '}'?)"))

    ret))

(define-typed-fn parser-check-trailing ((parser parser) val)
    (T t)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (parser-white parser)
  (when (char> (parser-ch parser) #\Nul)
    (error-at parser "Syntax error, found trailing characters"))
  val)

;; Braces for the root object are optional
(define-typed-fn parser-root-value ((parser parser))
    (T)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))

  (parser-white parser)
  (case (parser-ch parser)
    (#\{
     (return-from parser-root-value
       (parser-check-trailing parser (parser-read-object parser nil))))
    (#\[
     (return-from parser-root-value
       (parser-check-trailing parser (parser-read-vector parser)))))

  (let ((ret nil))
    (handler-case
        ;; Assume we have a root without braces
        (setf ret (parser-check-trailing parser (parser-read-object parser t)))
      (hjson-error ()
        ;; test if we are dealing with a single JSON value instead
        ;; (true/false/null/num/"")
        (parser-reset-at parser)
        (handler-case
            (return-from parser-root-value
              (parser-check-trailing parser (parser-read-value parser)))
          (hjson-error ()))))
    ret))

(defgeneric parse-hjson (source))

(defmethod parse-hjson ((data string))
  "Parses the HJSON data DATA, then unmarshals the result and returns
  the raw unmarshaled data as a hash table.  This will consist of one
  or more of the following:

  * More hash table
  * Vectors
  * Strings
  * Numbers"
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (let ((parser (make-parser :data data)))
    (parser-reset-at parser)
    (parser-root-value parser)))

(defmethod parse-hjson ((data pathname))
  (parse-hjson (sdm:read-file-to-end data :return-type :string)))
