;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;; Copyright (C) 2015 Jaime Olivares
;;;; Copyright (c) 2011 Matthew Francis
;;;; Ported from the Java implementation by Matthew Francis:
;;;; https://github.com/MateuszBartosiewicz/bzip2.
;;;;
;;;; Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
;;;; http://github.com/jaime-olivares/bzip2
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm-bzip2)

(defclass mtf-and-rle-2-stage-encoder ()
  ((bwt-block
    :initarg :bwt-block
    :type t/int32-array
    :documentation "The Burrows-Wheeler transformed block.")

   (bwt-length
    :initarg :bwt-length
    :type t/int32
    :documentation "Actual length of the data in the bwtBlock array.")

   (bwt-values-in-use
    :initarg :bwt-values-in-use
    :type (simple-array boolean (*))
    :documentation "At each position, true if the byte value with that index is present within the
block, otherwise false.")

   (mtf-block
    :type t/uint16-array
    :reader mtf-rle-encoder-mtf-block
    :documentation "The output of the Move To Front Transform and Run-Length
Encoding[2] stages.")

   (mtf-symbol-frequencies
    :initform (new-array +max-alphabet-size+ t/int32)
    :type (t/int32-array #.+max-alphabet-size+)
    :reader mtf-rle-encoder-mtf-symbol-frequencies
    :documentation "The global frequencies of values within the MTF-BLOCK array.")

   (mtf-length
    :initform 0
    :type t/int32
    :reader mtf-rle-encoder-mtf-length
    :documentation "Gets The actual length of the MTF block.")

   (mtf-alphabet-size
    :initform 0
    :type t/int32
    :reader mtf-rle-encoder-mtf-alphabet-size
    :documentation "Gets the size of the MTF block's alphabet."))
  (:documentation "An encoder for the BZip2 Move To Front Transform and Run-Length Encoding[2]
stages.

Although conceptually these two stages are separate, it is computationally
efficient to perform them in one pass."))

(defmethod initialize-instance :after ((obj mtf-and-rle-2-stage-encoder) &key &allow-other-keys)
  (check-type (slot-value obj 'bwt-block) t/int32-array)
  (check-type (slot-value obj 'bwt-length) t/int32)
  (check-type (slot-value obj 'bwt-values-in-use) (simple-array boolean (*)))
  (setf (slot-value obj 'mtf-block)
        (new-array (1+ (slot-value obj 'bwt-length)) t/uint16)))

(defgeneric mtf-rle-encoder-encode (encoder)
  (:documentation "Performs the Move To Front transform and Run Length Encoding[1] stages."))

(defmethod mtf-rle-encoder-encode ((encoder mtf-and-rle-2-stage-encoder))
  (declare (optimize speed (debug 1)))
  (with-typed-slots ((t/int32-array bwt-block)
                     (t/int32 bwt-length mtf-length mtf-alphabet-size)
                     ((simple-array boolean (*)) bwt-values-in-use)
                     (t/uint16-array mtf-block)
                     ((t/int32-array #.+max-alphabet-size+) mtf-symbol-frequencies))
      encoder
    (let ((huffman-symbol-map (new-array 256 t/uint8))
          (symbol-mtf (make-move-to-front))
          (total-unique-values 0)
          (end-of-block-symbol 0)
          (mtf-index 0)
          (repeat-count 0)
          (total-run-as 0)
          (total-run-bs 0)
          (mtf-position 0))
      (declare (type t/uint8-array huffman-symbol-map)
               (type t/int32 total-unique-values end-of-block-symbol mtf-index repeat-count
                     total-run-as total-run-bs mtf-position)
               (dynamic-extent huffman-symbol-map))

      (dotimes (i 256)
        (when (aref bwt-values-in-use i)
          (setf (aref huffman-symbol-map i) (coerce-to-uint8 total-unique-values))
          (incf total-unique-values)))
      (setf end-of-block-symbol (1+ total-unique-values))

      (dotimes (i bwt-length)
        ;; Move-To-Front
        (setf mtf-position (mtf-value-to-front symbol-mtf (aref huffman-symbol-map (logand (aref bwt-block i) #xFF))))

        ;; Run-length encoding
        (cond
          ((zerop mtf-position)
           (incf repeat-count))
          (t
           (when (plusp repeat-count)
             (decf repeat-count)
             (loop do
               (cond
                 ((not (flag? repeat-count 1))
                  (setf (aref mtf-block mtf-index) +rle-symbol-run-a+)
                  (incf mtf-index)
                  (incf total-run-as))
                 (t
                  (setf (aref mtf-block mtf-index) +rle-symbol-run-b+)
                  (incf mtf-index)
                  (incf total-run-bs)))
               (when (<= repeat-count 1)
                 (loop-finish))
               (setf repeat-count (ash (- repeat-count 2) -1)))

             (setf repeat-count 0))

           (setf (aref mtf-block mtf-index) (coerce-to-uint16 (1+ mtf-position)))
           (incf mtf-index)
           (incf (aref mtf-symbol-frequencies (1+ mtf-position))))))

      (when (plusp repeat-count)
        (decf repeat-count)
        (loop do
          (cond
            ((not (flag? repeat-count 1))
             (setf (aref mtf-block mtf-index) +rle-symbol-run-a+)
             (incf mtf-index)
             (incf total-run-as))
            (t
             (setf (aref mtf-block mtf-index) +rle-symbol-run-b+)
             (incf mtf-index)
             (incf total-run-bs)))
          (when (<= repeat-count 1)
            (loop-finish))
          (setf repeat-count (ash (- repeat-count 2) -1))))

      (setf (aref mtf-block mtf-index) (coerce-to-uint16 end-of-block-symbol))

      (incf (aref mtf-symbol-frequencies end-of-block-symbol))
      (incf (aref mtf-symbol-frequencies +rle-symbol-run-a+) total-run-as)
      (incf (aref mtf-symbol-frequencies +rle-symbol-run-b+) total-run-bs)

      (setf mtf-length (1+ mtf-index))
      (setf mtf-alphabet-size (1+ end-of-block-symbol))))
  nil)
