;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;; Copyright (C) 2015 Jaime Olivares
;;;; Copyright (c) 2011 Matthew Francis
;;;; Ported from the Java implementation by Matthew Francis:
;;;; https://github.com/MateuszBartosiewicz/bzip2.
;;;;
;;;; Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
;;;; http://github.com/jaime-olivares/bzip2
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm-bzip2)

(defclass bzip2-reader (trivial-gray-streams:fundamental-binary-input-stream)
  ((sync-close-p
    :initform nil
    :type boolean
    :accessor bzip2-reader-sync-close-p
    :documentation "When T, then closing this BZIP2-READER will also close the
underlying STREAM.  Otherwise when this is NIL, the underlying stream will stay
open when this BZIP2-READER is closed.")

   (closed-p
    :initform nil
    :type boolean
    :documentation "Whether or not this instance is closed.")

   (io
    :type stream
    :documentation "The underlying STREAM.")

   (block-stream
    :initform nil
    :type (or null block-decompressor)
    :documentation "The actual block decompression stream.")

   (bit-stream
    :type bit-reader
    :documentation "A BIT-READER that is used to read underlying stream data.")

   (stream-block-size
    :initform 0
    :type t/int32
    :documentation "The size of each block in the decompression stream.")

   (stream-crc
    :initform 0
    :type t/uint32
    :documentation "CRC32 of the underlying stream data.")

   (at-end?
    :initform nil
    :type boolean
    :documentation "Whether or not the stream is at the end of the BZip2 data."))
  (:documentation "A BZip2 decompressor that reads from a STREAM.  This
implements binary Gray Streams reading methods."))

(defun make-bzip2-reader (stream &optional sync-close?)
  "Creates a new BZIP2-READER that will read compressed data from STREAM and
decompress it.  If SYNC-CLOSE is truthy, then closing the BZIP2-READER will also
close STREAM automatically."
  (let ((ret (make-instance 'bzip2-reader)))
    (setf (slot-value ret 'io) stream)
    (setf (slot-value ret 'bit-stream) (make-bit-reader stream))
    (setf (slot-value ret 'sync-close-p) (if sync-close? t nil))
    ret))

(defmacro with-bzip2-reader ((reader stream &optional sync-close?) &body forms)
  "Creates a new BZIP2-READER instance and binds it to READER.  This instance will
read compressed data from STREAM and decompress it.  After FORMS is executed,
READER is automatically closed.  If SYNC-CLOSE is truthy, then STREAM will be
closed as well."
  `(let ((,reader (make-bzip2-reader ,stream ,sync-close?)))
     (unwind-protect
          (progn ,@forms)
       (cl:close ,reader))))

(defmacro with-open-bzip2-reader ((reader filespec) &body forms)
  "Opens the file at FILESPEC for reading, then binds a new BZIP2-READER instance
to READER.  This BZIP2-READER will decompress data from the file.  After FORMS
are executed, the file and READER are both closed.  The file must already exist."
  (with-gensyms (file)
    `(with-open-file (,file ,filespec :direction :input :element-type '(unsigned-byte 8)
                                      :if-does-not-exist :error)
       (with-bzip2-reader (,reader ,file)
         ,@forms))))

(define-typed-fn reader-init-stream ((bzip2-reader reader))
    (null)
  "Initializes information about the underlying compressed data stream (e.g. block
size).

Do not call this from within the constructor."
  (with-typed-slots ((bit-reader bit-stream)
                     (t/int32 stream-block-size))
      reader
    (let* ((marker-1 (bit-reader-read bit-stream 16))
           (marker-2 (bit-reader-read bit-stream 8))
           (block-size (- (the fixnum (bit-reader-read bit-stream 8)) #.(char-code #\0))))
      (when (or (/= marker-1 +stream-start-marker-1+)
                (/= marker-2 +stream-start-marker-2+)
                (< block-size +min-compression-level+)
                (> block-size +max-compression-level+))
        (error 'bzip2-error :format-control "Invalid BZip2 header"))

      (setf stream-block-size (* block-size 100000))))
  nil)

(define-typed-fn reader-init-next-block ((bzip2-reader reader))
    (boolean)
  "Reads the next block so that it's ready for decompression.

Do not call this from within the constructor."
  (with-typed-slots ((boolean at-end?)
                     ((or null block-decompressor) (stream block-stream))
                     (t/uint32 stream-crc)
                     (bit-reader bit-stream)
                     (t/int32 stream-block-size))
      reader
    (when at-end? (return-from reader-init-next-block))

    ;; If a block is complete, check the block CRC and integrate it into the
    ;; stream CRC.
    (when stream
      (let ((block-crc (blk-dec-check-crc stream)))
        (setf stream-crc (logxor (logior (ash stream-crc 1) (ash stream-crc -31)) block-crc))))

    ;; Read block-header or end-of-stream marker.
    (let* ((marker-1 (bit-reader-read bit-stream 24))
           (marker-2 (bit-reader-read bit-stream 24)))
      (cond
        ((and (= marker-1 +block-header-marker-1+)
              (= marker-2 +block-header-marker-2+))
         ;; Initialize a new block, then return.
         (handler-bind
             ((error (lambda (err)
                       (setf at-end? t)
                       err)))
           (setf stream (make-block-decompressor bit-stream stream-block-size)))
         t)

        ((and (= marker-1 +stream-end-marker-1+)
              (= marker-2 +stream-end-marker-2+))
         ;; End of stream, verify the CRC
         (setf at-end? t)
         (let ((combined-crc (coerce-to-uint32 (bit-reader-read bit-stream 32))))
           (unless (= combined-crc stream-crc)
             (error 'bzip2-error :format-control "BZip2 stream CRC error")))
         nil)

        (t
         ;; If what was read is not a valid block-header or end-of-stream
         ;; marker, the stream is broken.
         (setf at-end? t)
         (error 'bzip2-error :format-control "BZip2 stream format error"))))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Gray Streams Implemention
;;;

(defmethod cl:open-stream-p ((stream bzip2-reader))
  (declare (optimize speed (debug 1)))
  (with-slots (closed-p) stream
    (not closed-p)))

(defmacro ensuring-open-stream ((stream) &body forms)
  `(if (open-stream-p ,stream)
       (progn ,@forms)
       (error "Stream is closed")))

(defmethod cl:output-stream-p ((stream bzip2-reader))
  t)

(defmethod cl:input-stream-p ((stream bzip2-reader))
  nil)

(defmethod cl:stream-element-type ((stream bzip2-reader))
  '(unsigned-byte 8))

(defmethod cl:close ((stream bzip2-reader) &key abort)
  (declare (optimize speed (debug 1)))
  (declare (ignore abort))
  (when (cl:open-stream-p stream)
    (setf (slot-value stream 'closed-p) t)
    (when (slot-value stream 'sync-close-p)
      (close (slot-value stream 'io)))
    t))

(defmethod trivial-gray-streams:stream-listen ((stream bzip2-reader))
  (declare (optimize speed (debug 0)))
  (with-typed-slots ((boolean at-end?))
      stream
    (not at-end?)))

(defmethod trivial-gray-streams:stream-file-position ((stream bzip2-reader))
  (error "Not implemented for BZIP2-READERs"))

(defmethod (setf trivial-gray-streams:stream-file-position)
    (new-offset (stream bzip2-reader))
  (declare (ignore new-offset))
  (error "Not implemented for BZIP2-READERs"))

(defmethod trivial-gray-streams:stream-read-byte ((reader bzip2-reader))
  (with-typed-slots ((boolean at-end?)
                     ((or null block-decompressor) (stream block-stream)))
      reader
    (when at-end?
      (return-from trivial-gray-streams:stream-read-byte :eof))
    (let ((byte-read -1))
      ;; Is the underlying block decompressor (STREAM) initialized and ready to
      ;; go?
      (if stream
          ;; Decompress
          (setf byte-read (blk-dec-read-byte stream))

          ;; Initialize decompressor.
          (reader-init-stream reader))

      ;; Did we read a byte?
      (cond
        ((= byte-read -1)
         ;; Nope.  Try initializing the next block and then re-reading.
         (when (reader-init-next-block reader)
           (setf byte-read (blk-dec-read-byte stream)))

         ;; Still no?
         (if (= byte-read -1)
             :eof
             byte-read))

        ;; We did, just return.
        (t byte-read)))))

(defmethod trivial-gray-streams:stream-read-sequence
    ((reader bzip2-reader) sequence start end &key)
  (check-type start fixnum)
  (check-type end (or null fixnum))
  (check-type sequence (vector (unsigned-byte 8)))

  (with-typed-slots ((boolean at-end?)
                     ((or null block-decompressor) (stream block-stream)))
      reader
    ;; Check that START, END, and length of SEQUENCE are all good to go.
    (cond
      ((minusp start)
       (error "START must be >= zero"))
      ((and (integerp end)
            (> start end))
       (error "START must be less than END"))
      ((or (= (length sequence) 0)
           at-end?
           (eq end 0)
           (eq start end))
       ;; Nothing to read.
       (return-from trivial-gray-streams:stream-read-sequence 0)))

    ;; Do the reading.  We'll use a displaced array for this.
    (let* (;; Number of bytes read so far.
           (bytes-read 0)

           ;; How many bytes we actually want to read.
           (buf-len (- (if end
                           (min (length sequence) end)
                           (length sequence))
                       start))

           ;; Displaced array for reading data into SEQUENCE.
           (buf (make-array buf-len :element-type 't/uint8
                                    :displaced-to sequence
                                    :displaced-index-offset start)))
      (declare (type fixnum bytes-read buf-len)
               (type (vector t/uint8 *) buf))
      ;; Is the underlying block decompressor (STREAM) initialized and ready to
      ;; go?
      (if stream
          ;; Decompress bytes into BUF.
          (setf bytes-read (blk-dec-read stream buf))

          ;; Initialize the underlying stream.
          (reader-init-stream reader))

      ;; Check to see if we've actually read enough.
      (cond
        ((<= bytes-read 0)
         ;; We didn't read enough.  Initialize the next block and decompress
         ;; into BUF.  Then return the number of bytes read.
         (when (reader-init-next-block reader)
           (setf bytes-read (blk-dec-read stream buf)))
         (max bytes-read 0))

        ;; We've read enough.
        (t bytes-read)))))

(defmethod trivial-gray-streams:stream-read-char ((stream bzip2-reader))
  (declare (optimize speed (debug 1)))
  (let ((byte (trivial-gray-streams:stream-read-byte stream)))
    (if (eq byte :eof)
        :eof
        (code-char byte))))

(defmethod trivial-gray-streams:stream-read-line ((stream bzip2-reader))
  (declare (optimize speed (debug 1)))
  (values
   (with-output-to-string (out)
     (loop for char = (trivial-gray-streams:stream-read-char stream) do
       (case char
         (:eof
          (return-from trivial-gray-streams:stream-read-line
            (values (get-output-stream-string out) t)))
         (otherwise
          (if (not (char= char #\Newline))
              (write-char char out)
              (loop-finish))))))
   nil))
