;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;; Copyright (C) 2015 Jaime Olivares
;;;; Copyright (c) 2011 Matthew Francis
;;;; Ported from the Java implementation by Matthew Francis:
;;;; https://github.com/MateuszBartosiewicz/bzip2.
;;;;
;;;; Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
;;;; http://github.com/jaime-olivares/bzip2
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm-bzip2)

(deftype t/code-bases-table ()
  '(simple-array t/int32 (#.+maximum-tables+ #.(+ +max-code-length+ 2))))

(deftype t/code-limits-table ()
  '(simple-array t/int32 (#.+maximum-tables+ #.(+ +max-code-length+ 1))))

(deftype t/code-symbols-table ()
  '(simple-array t/int32 (#.+maximum-tables+ #.+max-alphabet-size+)))

(defstruct (huffman-stage-decoder (:constructor %make-huffman-stage-decoder)
                                  (:conc-name huff-dec-))
  (reader nil :type (or null bit-reader))
  (selectors nil :type (or null t/uint8-array))
  (minimum-lengths (new-array +maximum-tables+ t/int32)
   :type (simple-array t/int32 (#.+maximum-tables+)))

  (code-bases (make-array (list #.+maximum-tables+ #.(+ +max-code-length+ 2))
                          :element-type 't/int32 :initial-element 0)
   :type t/code-bases-table)

  (code-limits (make-array (list #.+maximum-tables+ #.(+ +max-code-length+ 1))
                           :element-type 't/int32 :initial-element 0)
   :type t/code-limits-table)

  (code-symbols (make-array (list #.+maximum-tables+ #.+max-alphabet-size+)
                            :element-type 't/int32 :initial-element 0)
   :type t/code-symbols-table)

  (current-table 0 :type t/uint8)
  (group-index -1 :type t/int32)
  (group-position -1 :type t/int32))

(define-typed-fn make-huffman-stage-decoder ((bit-reader reader)
                                             (t/int32 alphabet-size)
                                             (t/uint8-array-array table-code-lengths)
                                             (t/uint8-array selectors))
    (huffman-stage-decoder t)
  (let ((ret (%make-huffman-stage-decoder :reader reader
                                          :selectors selectors
                                          :current-table (aref selectors 0))))
    (huff-dec-create-decoding-table ret alphabet-size table-code-lengths)
    ret))

(define-typed-fn huff-dec-next-symbol ((huffman-stage-decoder dec))
    (t/int32)
  (declare (optimize (speed 3) (debug 1) (safety 0) (compilation-speed 0)))
  (with-typed-slots ((t/int32 group-index group-position)
                     (t/uint8 current-table)
                     (t/uint8-array selectors)
                     (t/int32-array minimum-lengths)
                     (t/code-limits-table code-limits)
                     (t/code-symbols-table code-symbols)
                     (t/code-bases-table code-bases)
                     (bit-reader reader))
      dec

    ;; Move to next group selector if required.
    (incf group-position)
    (when (= (mod group-position +group-run-length+) 0)
      (incf group-index)
      (when (>= group-index (length selectors))
        (error 'bzip2-error :format-control "Error decoding BZip2 block"))
      (setf current-table (aref selectors group-index)))

    (let* ((code-len (aref minimum-lengths current-table))
           (code-bits (coerce-to-uint32 (bit-reader-read reader code-len))))
      (declare (type t/int32 code-len)
               (type t/uint32 code-bits))

      ;; Starting with the minimum bit length for the table, read additional
      ;; bits one at a time until a complete code is recognised.
      (loop while (<= code-len +max-code-length+) do
        (when (<= code-bits (aref code-limits current-table code-len))
          ;; Convert the code to a symbol index and return.
          (return-from huff-dec-next-symbol
            (aref code-symbols current-table (- code-bits (aref code-bases current-table code-len)))))

        (setf code-bits (logior (coerce-to-uint32 (ash code-bits 1))
                                (the t/uint8 (bit-reader-read reader 1))))
        (incf code-len))))

  (error 'bzip2-error :format-control "Error decoding BZip2 block"))

(define-typed-fn huff-dec-create-decoding-table ((huffman-stage-decoder dec)
                                                 (t/int32 alphabet-size)
                                                 (t/uint8-array-array  table-code-lengths))
    (null)
  (let ((max-len 0)
        (min-len 0)
        (code 0)
        (base1 0)
        (code-index 0))
    (declare (type t/int32 max-len min-len code base1 code-index))

    (loop for table fixnum from 0 below (array-dimension table-code-lengths 0) do
      (setf min-len #.(1- (expt 2 31)))
      (setf max-len #.(- (expt 2 31)))

      ;; Find the minimum and maximum code length for the table.
      (loop for i fixnum from 0 below alphabet-size do
        (setf max-len (max (aref table-code-lengths table i) max-len))
        (setf min-len (min (aref table-code-lengths table i) min-len)))

      (setf (aref (huff-dec-minimum-lengths dec) table) min-len)

      ;; Calculate the first output symbol for each code length.
      (loop for i fixnum from 0 below alphabet-size do
        (incf (aref (huff-dec-code-bases dec)
                    table
                    (1+ (aref table-code-lengths table i)))))

      (loop for i fixnum from 1 below (+ +max-code-length+ 2) do
        (incf (aref (huff-dec-code-bases dec) table i)
              (aref (huff-dec-code-bases dec) table (1- i))))

      ;; Calculate the first and last Huffman code for each code length (codes
      ;; at a given length are sequential in value).
      (setf code 0)
      (loop for i fixnum from min-len to max-len do
        (setf base1 code)
        (incf code (- (aref (huff-dec-code-bases dec) table (1+ i))
                      (aref (huff-dec-code-bases dec) table i)))
        (setf (aref (huff-dec-code-bases dec) table i)
              (- base1 (aref (huff-dec-code-bases dec) table i)))
        (setf (aref (huff-dec-code-limits dec) table i) (1- code))
        (setf code (ash code 1)))

      ;; Populate the mapping from canonical code index to output symbol.
      (setf code-index 0)
      (loop for bit-len fixnum from min-len to max-len do
        (dotimes (sym alphabet-size)
          (when (= (aref table-code-lengths table sym) bit-len)
            (setf (aref (huff-dec-code-symbols dec) table code-index) sym)
            (incf code-index))))))

  nil)
