;;;; CL-SDM - Opinionated Extra Batteries for Common Lisp
;;;; Copyright (C) 2021-2025 Remilia Scarlet <remilia@posteo.jp>
;;;; Copyright (C) 2015 Jaime Olivares
;;;; Copyright (C) 2011 Matthew Francis
;;;; Copyright (C) 2003-2008 Yuta Mori
;;;; Ported from the Java implementation by Matthew Francis:
;;;; https://github.com/MateuszBartosiewicz/bzip2.
;;;;
;;;; Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
;;;; http://github.com/jaime-olivares/bzip2
;;;;
;;;; This program is free software: you can redistribute it and/or modify it
;;;; under the terms of the GNU Affero General Public License as published by
;;;; the Free Software Foundation, either version 3 of the License, or (at your
;;;; option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful, but WITHOUT
;;;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;;;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public
;;;; License for more details.
;;;;
;;;; You should have received a copy of the GNU Affero General Public License
;;;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
(in-package :cl-sdm-bzip2)

;;;;
;;;; Burrows-Wheeler Transform/Suffix Sorting
;;;;
;;;; DivSufSort suffix array generator
;;;; Based on libdivsufsort 1.2.3 patched to support BZip2
;;;;
;;;; This is a port of the C# version, which is a port of the Java version,
;;;; which is a simple conversion of the original C with two minor bugfixes
;;;; applied (see "BUGFIX" comments within the code). Documentation within the
;;;; original code was largely absent.
;;;;
;;;; Remi: TODO Clean this code up.  There are a LOT of places with no comments,
;;;; bad variable names, etc.
;;;;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; Constants
;;;

(defining-consts
  (+stack-size+ 64)
  (+bucket-a-size+ 256)
  (+bucket-b-size+ 65536)
  (+ss-block-size+ 1024)
  (+insertion-sort-threshold+ 8)

  (+log-2-table+
   (new-array-with
    t/int8
    #(-1 0 1 1 2 2 2 2 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
      5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
      6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
      6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
      7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
      7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
      7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
      7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; STACK-ENTRY record
;;;

(defstruct stack-entry
  (a 0 :type t/int32)
  (b 0 :type t/int32)
  (c 0 :type t/int32)
  (d 0 :type t/int32))

(define-typed-fn stack-entry-set ((stack-entry entry) (t/int32 a b c d))
    (null t)
  (declare (optimize speed (debug 1)))
  (setf (stack-entry-a entry) a
        (stack-entry-b entry) b
        (stack-entry-c entry) c
        (stack-entry-d entry) d)
  nil)

(define-typed-fn stack-entry-reset ((stack-entry entry))
    (null t)
  (declare (optimize speed (debug 1)))
  (setf (stack-entry-a entry) 0
        (stack-entry-b entry) 0
        (stack-entry-c entry) 0
        (stack-entry-d entry) 0)
  nil)

(deftype t/stack ()
  '(simple-array stack-entry (*)))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; PARTITION-RESULT record
;;;

(defstruct partition-result
  (first 0 :type t/int32)
  (last 0 :type t/int32))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; TR-BUDGET record
;;;

(defstruct tr-budget
  (budget 0 :type t/int32)
  (chance 0 :type t/int32))

(define-typed-fn tr-budget-update ((tr-budget obj) (fixnum size n))
    (boolean t)
  (declare (optimize speed (debug 1)))
  (decf (tr-budget-budget obj) n)
  (when (<= (tr-budget-budget obj) 0)
    (decf (tr-budget-chance obj))
    (when (zerop (tr-budget-chance obj))
      (return-from tr-budget-update nil))
    (incf (tr-budget-budget obj) size))
  t)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; DIV-SUF-SORT
;;;

(defstruct (div-suf-sort (:constructor %make-div-suf-sort)
                         (:conc-name dss-))
  (sa    (new-array 0 t/int32) :type t/int32-array)
  (bytes (new-array 0 t/uint8) :type t/uint8-array)
  (n     0                     :type t/int32)
  (stack (new-array-with
          stack-entry
          (loop repeat +stack-size+ collect (make-stack-entry)))
   :type t/stack)
  (ssize 0 :type t/int32)
  (int32-pool (make-array-pool 't/int32 0) :type array-pool))

(defmacro break-unless (&body forms)
  `(unless (progn ,@forms)
     (loop-finish)))

(defmacro break-when (&body forms)
  `(when (progn ,@forms)
     (loop-finish)))

(defmacro dss-bytes-aref-chain (dss td &rest offsets)
  (labels
      ((make-chain (the-dss the-offset more-offsets)
         (list 'aref (list 'dss-sa the-dss)
               (if (null (car more-offsets))
                   ;; At end
                   the-offset
                   ;; Recurse
                   (list '+ the-offset
                         (make-chain the-dss (car more-offsets) (cdr more-offsets)))))))
    `(aref (dss-bytes ,dss)
           (+ ,td ,(make-chain dss (car offsets) (cdr offsets))))))

(define-typed-fn make-stack ((div-suf-sort dss))
    (null t)
  (loop for ent across (dss-stack dss) do (stack-entry-reset ent))
  (setf (dss-ssize dss) 0)
  nil)

(define-typed-fn push-stack ((div-suf-sort dss) (t/int32 a b c d))
    (null t)
  (stack-entry-set (aref (dss-stack dss) (dss-ssize dss)) a b c d)
  (incf (dss-ssize dss))
  nil)

(define-typed-fn pop-stack ((div-suf-sort dss))
    (stack-entry t)
  (decf (dss-ssize dss))
  (aref (dss-stack dss) (dss-ssize dss)))

(define-typed-fn stack-empty-p ((div-suf-sort dss))
    (boolean t)
  (zerop (dss-ssize dss)))

(define-typed-fn bucket-b ((t/int32 c0 c1))
    (t/int32 t)
  (declare (optimize speed (debug 1)))
  (logior (coerce-to-int32 (ash c1 8)) c0))

(define-typed-fn bucket-b-star ((t/int32 c0 c1))
    (t/int32 t)
  (declare (optimize speed (debug 1)))
  (logior (coerce-to-int32 (ash c0 8)) c1))

(define-typed-fn dss-swap ((simple-array arr1) (fixnum idx1)
                           (simple-array arr2) (fixnum idx2))
    (null t)
  "Swaps the element at IDX1 in ARR1 and the element at IDX2 in ARR2."
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (muffling
    (psetf (aref arr1 idx1) (aref arr2 idx2)
           (aref arr2 idx2) (aref arr1 idx1)))
;;    (let ((tmp (aref arr1 idx1)))
;;      (setf (aref arr1 idx1) (aref arr2 idx2))
;;      (setf (aref arr2 idx2) tmp)))
  nil)

(define-typed-fn dss-ss-compare ((div-suf-sort dss) (t/int32 p1 p2 depth))
    (t/int32)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((u1n (+ (aref (dss-sa dss) (1+ p1)) 2))
        (u2n (+ (aref (dss-sa dss) (1+ p2)) 2))
        (u1 (+ depth (aref (dss-sa dss) p1)))
        (u2 (+ depth (aref (dss-sa dss) p2))))
    (declare (type t/int32 u1n u2n u1 u2))

    (loop while (and (< u1 u1n)
                     (< u2 u2n)
                     (= (aref (dss-bytes dss) u1)
                        (aref (dss-bytes dss) u2)))
          do (incf u1)
             (incf u2))

    (cond
      ((< u1 u1n)
       (if (< u2 u2n)
           (- (aref (dss-bytes dss) u1)
              (aref (dss-bytes dss) u2))
           1))
      ((< u2 u2n)
       -1)
      (t 0))))

(define-typed-fn dss-ss-compare-last ((div-suf-sort dss) (t/int32 pa p1 p2 depth size))
    (t/int32)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((u1 (+ depth (aref (dss-sa dss) p1)))
        (u2 (+ depth (aref (dss-sa dss) p2)))
        (u1n size)
        (u2n (+ (aref (dss-sa dss) (1+ p2)) 2)))
    (declare (type t/int32 u1n u2n u1 u2))

    (loop while (and (< u1 u1n)
                     (< u2 u2n)
                     (= (aref (dss-bytes dss) u1)
                        (aref (dss-bytes dss) u2)))
          do (incf u1)
             (incf u2))

    (when (< u1 u1n)
      (if (< u2 u2n)
          (return-from dss-ss-compare-last (- (aref (dss-bytes dss) u1)
                                              (aref (dss-bytes dss) u2)))
          (return-from dss-ss-compare-last 1)))

    (when (= u2 u2n)
      (return-from dss-ss-compare-last 1))

    (setf u1 (mod u1 size))
    (setf u1n (+ (aref (dss-sa dss) pa) 2))
    (loop while (and (< u1 u1n)
                     (< u2 u2n)
                     (= (aref (dss-bytes dss) u1)
                        (aref (dss-bytes dss) u2)))
          do (incf u1)
             (incf u2))

    (cond
      ((< u1 u1n)
       (if (< u2 u2n)
           (- (aref (dss-bytes dss) u1)
              (aref (dss-bytes dss) u2))
           1))
      ((< u2 u2n)
       -1)
      (t 0))))

(define-typed-fn dss-ss-insertion-sort ((div-suf-sort dss) (t/int32 pa first last depth))
    (null)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (loop with i fixnum = (- last 2)
        with j fixnum = 0
        with x fixnum = 0
        with r fixnum = 0
        while (<= first i)
        do (setf j (1+ i))
           (setf x (aref (dss-sa dss) i))

           (loop while (< 0 (setf r (dss-ss-compare dss (+ pa x)
                                                    (+ pa (aref (dss-sa dss) j))
                                                    depth)))
                 do (loop do
                      (setf (aref (dss-sa dss) (1- j))
                            (aref (dss-sa dss) j))
                      (break-unless
                        (and (< (incf j) last)
                             (minusp (aref (dss-sa dss) j)))))
                    (break-when (<= last j)))

           (when (zerop r)
             (setf (aref (dss-sa dss) j) (lognot-i32 (aref (dss-sa dss) j))))
           (setf (aref (dss-sa dss) (1- j)) x)
           (decf i))
  nil)

(define-typed-fn dss-ss-fixdown ((div-suf-sort dss) (t/int32 td pa sa the-i size))
    (null)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (loop with i fixnum = the-i
        with v fixnum = (aref (dss-sa dss) (+ sa i))
        with c fixnum = (aref (dss-bytes dss)
                              (+ td (aref (dss-sa dss)
                                          (+ pa v))))
        with j fixnum = 0
        with k fixnum = 0
        with d fixnum = 0
        with e fixnum = 0
        while (< (setf j (1+ (* 2 i))) size)
        do (setf d (dss-bytes-aref-chain dss td pa (+ sa (setf k j))))
           (incf j)
           (when (< d (setf e (dss-bytes-aref-chain dss td pa (+ sa j))))
             (setf k j)
             (setf d e))
           (break-when (<= d c))

           (setf (aref (dss-sa dss) (+ sa i))
                 (aref (dss-sa dss) (+ sa k)))
           (setf i k)
        finally
           (setf (aref (dss-sa dss) (+ sa i)) v))
  nil)

(define-typed-fn dss-ss-heap-sort ((div-suf-sort dss) (t/int32 td pa sa size))
    (null)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((m size))
    (declare (type t/int32 m))
    (when (zerop (mod size 2))
      (decf m)
      (when (< (dss-bytes-aref-chain dss td pa (+ sa (truncate m 2)))
               (dss-bytes-aref-chain dss td pa (+ sa m)))
        (dss-swap (dss-sa dss) (+ sa m)
                  (dss-sa dss) (+ sa (truncate m 2)))))

    (loop for i fixnum from (1- (truncate m 2)) downto 0 do
      (dss-ss-fixdown dss td pa sa i m))

    (when (zerop (mod size 2))
      (dss-swap (dss-sa dss) sa (dss-sa dss) (+ sa m))
      (dss-ss-fixdown dss td pa sa 0 m))

    (loop for i from (1- m) downto 1
          for x fixnum = (aref (dss-sa dss) sa)
          do (setf (aref (dss-sa dss) sa)
                   (aref (dss-sa dss) (+ sa i)))
             (dss-ss-fixdown dss td pa sa 0 i)
             (setf (aref (dss-sa dss) (+ sa i)) x)))
  nil)

(define-typed-fn dss-ss-median-3 ((div-suf-sort dss) (t/int32 td pa the-v1 the-v2 v3))
    (t/int32)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((v1 the-v1)
         (v2 the-v2)
         (tv1 (dss-bytes-aref-chain dss td pa v1))
         (tv2 (dss-bytes-aref-chain dss td pa v2))
         (tv3 (dss-bytes-aref-chain dss td pa v3)))
    (when (> tv1 tv2)
      (psetf v1 v2
             v2 v1
             tv1 tv2
             tv2 tv1))

    (if (> tv2 tv3)
        (if (> tv1 tv3)
            v1
            v3)
        v2)))

(define-typed-fn dss-ss-median-5 ((div-suf-sort dss) (t/int32 td pa the-v1 the-v2 the-v3 the-v4 the-v5))
    (t/int32)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((v1 the-v1)
         (v2 the-v2)
         (v3 the-v3)
         (v4 the-v4)
         (v5 the-v5)
         (tv1 (dss-bytes-aref-chain dss td pa v1))
         (tv2 (dss-bytes-aref-chain dss td pa v2))
         (tv3 (dss-bytes-aref-chain dss td pa v3))
         (tv4 (dss-bytes-aref-chain dss td pa v4))
         (tv5 (dss-bytes-aref-chain dss td pa v5)))
    (when (> tv2 tv3)
      (psetf v2 v3
             v3 v2
             tv2 tv3
             tv3 tv2))

    (when (> tv4 tv5)
      (psetf v4 v5
             v5 v4
             tv4 tv5
             tv5 tv4))

    (when (> tv2 tv4)
      (psetf v4 v2
             tv2 tv4
             tv4 tv2
             v3 v5
             v5 v3
             tv3 tv5
             tv5 tv3))

    (when (> tv1 tv3)
      (psetf v1 v3
             v3 v1
             tv1 tv3
             tv3 tv1))

    (when (> tv1 tv4)
      (psetf v4 v1
             tv1 tv4
             tv4 tv1
             v3 v5
             tv3 tv5
             tv5 tv3))

    (if (> tv3 tv4)
        v4
        v3)))

(define-typed-fn dss-ss-pivot ((div-suf-sort dss) (t/int32 td pa first last))
    (t/int32)
  ;;  (declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((x (- last first))
         (middle (+ first (truncate x))))
    (declare (type t/int32 x middle))

    (when (<= x 512)
      (when (<= x 32)
        (return-from dss-ss-pivot (dss-ss-median-3 dss td pa first middle (1- last))))

      (setf x (ash x -2))
      (return-from dss-ss-pivot (dss-ss-median-5 dss td pa first (+ first x) middle (- last 1 x) (1- last))))

    (setf x (ash x -3))
    (dss-ss-median-3 dss
                     td
                     pa
                     (dss-ss-median-3 dss td pa first                (+ first x)  (+ first (ash x 1)))
                     (dss-ss-median-3 dss td pa (- middle x)         middle       (+ middle x))
                     (dss-ss-median-3 dss td pa (- last 1 (ash x 1)) (- last 1 x) (1- last)))))

(define-typed-fn dss-ss-log ((t/int32 x))
    (t/int32 t)
  (declare (optimize speed (debug 1)))
  (if (flag? x #xFF00)
      (+ 8 (aref +log-2-table+ (logand (ash x -8) #xFF)))
      (aref +log-2-table+ (logand x #xFF))))

(define-typed-fn dss-ss-substring-partition ((div-suf-sort dss) (t/int32 pa first last depth))
    (t/int32)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (loop with a fixnum = (1- first)
        with b fixnum = last
        with x fixnum = 0
        do (loop while (and (< (incf a) b)
                            (>= (+ (aref (dss-sa dss)
                                         (+ pa (aref (dss-sa dss) a)))
                                   depth)
                                (+ (aref (dss-sa dss)
                                         (+ pa (aref (dss-sa dss) a) 1))
                                   1)))
                 do (setf (aref (dss-sa dss) a) (lognot-i32 (aref (dss-sa dss) a))))

           (loop while (and (< a (decf b))
                            (< (+ (aref (dss-sa dss)
                                        (+ pa (aref (dss-sa dss) b)))
                                  depth)
                               (+ (aref (dss-sa dss)
                                        (+ pa (aref (dss-sa dss) b) 1))
                                  1))))

           (break-when (<= b a))
           (setf x (lognot-i32 (aref (dss-sa dss) b)))
           (setf (aref (dss-sa dss) b)
                 (aref (dss-sa dss) a))
           (setf (aref (dss-sa dss) a) x)

        finally
           (progn
             (when (< first a)
               (setf (aref (dss-sa dss) first) (lognot-i32 (aref (dss-sa dss) first))))
             (return a))))

(define-typed-fn dss-ss-multi-key-intro-sort ((div-suf-sort dss) (t/int32 pa the-first the-last the-depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (make-stack dss)
  (let* ((first the-first)
         (last the-last)
         (depth the-depth)
         (x 0)  (td 0)
         (a 0)  (v 0)
         (b 0)  (c 0)
         (d 0)  (s 0)
         (tt 0) (e 0)
         (f 0)
         (limit (dss-ss-log (- last first))))
    (declare (type t/int32 first last depth x td a v b c d s tt e f limit))

    (loop do
      (tagbody
         (when (<= (- last first) +insertion-sort-threshold+)
           (when (< 1 (- last first))
             (dss-ss-insertion-sort dss pa first last depth))
           (when (stack-empty-p dss)
             (return-from dss-ss-multi-key-intro-sort))
           (let ((entry (pop-stack dss)))
             (setf first (stack-entry-a entry))
             (setf last  (stack-entry-b entry))
             (setf depth (stack-entry-c entry))
             (setf limit (stack-entry-d entry))
             (go ==next==)))

         (setf td depth)
         (when (zerop limit)
           (dss-ss-heap-sort dss td pa first (- last first)))
         (decf limit)

         (when (minusp limit)
           (setf a (1+ first))
           (setf v (dss-bytes-aref-chain dss td pa first))

           (loop while (< a last) do
             (when (/= (setf x (dss-bytes-aref-chain dss td pa a)) v)
               (break-when (< 1 (- a first)))
               (setf v x)
               (setf first a))
             (incf a))

           (when (< (aref (dss-bytes dss)
                          (1- (+ td (aref (dss-sa dss)
                                          (+ pa (aref (dss-sa dss)
                                                      first))))))
                    v)
             (setf first (dss-ss-substring-partition dss pa first a depth)))

           (if (<= (- a first)
                   (- last a))
               (cond
                 ((< 1 (- a first))
                  (push-stack dss a last depth -1)
                  (setf last a)
                  (incf depth)
                  (setf limit (dss-ss-log (- a first))))
                 (t
                  (setf first a)
                  (setf limit -1)))
               (cond
                 ((< 1 (- last a))
                  (push-stack dss first a (1+ depth) (dss-ss-log (- a first)))
                  (setf first a)
                  (setf limit -1))
                 (t
                  (setf last a)
                  (incf depth)
                  (setf limit (dss-ss-log (- a first))))))

           (go ==next==)) ;; when (minusp limit)

         (setf a (dss-ss-pivot dss td pa first last))
         (setf v (dss-bytes-aref-chain dss td pa a))
         (dss-swap (dss-sa dss) first (dss-sa dss) a)

         (setf b first)
         (loop while (and (< (incf b) last)
                          (= (setf x (dss-bytes-aref-chain dss td pa b)) v)))

         (when (and (< (setf a b) last)
                    (< x v))
           (loop while (and (< (incf b) last)
                            (<= (setf x (dss-bytes-aref-chain dss td pa b)) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) b (dss-sa dss) a)
                      (incf a))))

         (setf c last)
         (loop while (and (< b (decf c))
                          (= (setf x (dss-bytes-aref-chain dss td pa c)) v)))

         (when (and (< b (setf d c))
                    (> x v))
           (loop while (and (< b (decf c))
                            (>= (setf x (dss-bytes-aref-chain dss td pa c)) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) c (dss-sa dss) d)
                      (decf d))))

         (loop while (< b c) do
           (dss-swap (dss-sa dss) b (dss-sa dss) c)

           (loop while (and (< (incf b) c)
                            (<= (setf x (dss-bytes-aref-chain dss td pa b)) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) b (dss-sa dss) a)
                      (incf a)))

           (loop while (and (< b (decf c))
                            (>= (setf x (dss-bytes-aref-chain dss td pa c)) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) c (dss-sa dss) d)
                      (decf d))))

         (cond
           ((<= a d)
            (setf c (1- b))

            (when (> (setf s (- a first))
                     (setf tt (- b a)))
              (setf s tt))

            (setf e first)
            (setf f (- b s))
            (loop while (< 0 s) do
              (dss-swap (dss-sa dss) e (dss-sa dss) f)
              (decf s)
              (incf e)
              (incf f))

            (when (> (setf s (- d c))
                     (setf tt (- last d 1)))
              (setf s tt))

            (setf e b)
            (setf f (- last s))
            (loop while (< 0 s) do
              (dss-swap (dss-sa dss) e (dss-sa dss) f)
              (decf s)
              (incf e)
              (incf f))

            (setf a (+ first (- b a)))
            (setf c (- last (- d c)))
            (setf b (if (<= v (aref (dss-bytes dss)
                                    (1- (+ td (aref (dss-sa dss)
                                                    (+ pa (aref (dss-sa dss)
                                                                a)))))))
                        a
                        (dss-ss-substring-partition dss pa a c depth)))

            (if (<= (- a first)
                    (- last c))
                (cond
                  ((<= (- last c)
                       (- c b))
                   (push-stack dss b c (1+ depth) (dss-ss-log (- c b)))
                   (push-stack dss c last depth limit)
                   (setf last a))

                  ((<= (- a first)
                       (- c b))
                   (push-stack dss c last depth limit)
                   (push-stack dss b c (1+ depth) (dss-ss-log (- c b)))
                   (setf last a))

                  (t
                   (push-stack dss c last depth limit)
                   (push-stack dss first a depth limit)
                   (setf first b)
                   (setf last c)
                   (incf depth)
                   (setf limit (dss-ss-log (- c b)))))

                ;; else
                (cond
                  ((<= (- a first)
                       (- c b))
                   (push-stack dss b c (1+ depth) (dss-ss-log (- c b)))
                   (push-stack dss first a depth limit)
                   (setf first c))

                  ((<= (- last c)
                       (- c b))
                   (push-stack dss first a depth limit)
                   (push-stack dss b c (1+ depth) (dss-ss-log (- c b)))
                   (setf first c))

                  (t
                   (push-stack dss first a depth limit)
                   (push-stack dss c last depth limit)
                   (setf first b)
                   (setf last c)
                   (incf depth)
                   (setf limit (dss-ss-log (- c b)))))))

           (t
            (incf limit)
            (when (< (aref (dss-bytes dss)
                           (1- (+ td (aref (dss-sa dss)
                                           (+ pa (aref (dss-sa dss)
                                                       first))))))
                     v)
              (setf first (dss-ss-substring-partition dss pa first last depth))
              (setf limit (dss-ss-log (- last first))))
            (incf depth)))
       ==next==)))
  nil)

(define-typed-fn dss-ss-block-swap ((simple-array arr1) (t/int32 first1)
                                    (simple-array arr2) (t/int32 first2)
                                    (t/int32 size))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (loop with a fixnum = first1
        with b fixnum = first2
        with i fixnum = size
        while (< 0 i) do
          (dss-swap arr1 a arr2 b)
          (decf i)
          (incf a)
          (incf b))
  nil)

(define-typed-fn dss-ss-merge-forward ((div-suf-sort dss) (t/int32 pa) (t/int32-array buf) (t/int32 buf-offset)
                                       (t/int32 first middle last depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((buf-end 0)
        (x 0) (i 0)
        (j 0) (k 0))
    (declare (type t/int32 buf-end x i j k))

    (setf buf-end (1- (+ buf-offset (- middle last))))
    (dss-ss-block-swap buf buf-offset (dss-sa dss) first (- middle first))
    (setf x (aref (dss-sa dss) first))
    (setf i first)
    (setf j buf-offset)
    (setf k middle)

    (loop for r fixnum = (dss-ss-compare dss
                                         (+ pa (aref buf j))
                                         (+ pa (aref (dss-sa dss) k))
                                         depth)
          do (cond
               ((minusp r)
                (loop do
                  (setf (aref (dss-sa dss) i) (aref buf j))
                  (incf i)
                  (when (<= buf-end j)
                    (setf (aref buf j) x)
                    (return-from dss-ss-merge-forward))
                  (setf (aref buf j) (aref (dss-sa dss) i))
                  (incf j)

                  (break-unless (minusp (aref buf j)))))

               ((plusp r)
                (loop do
                  (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
                  (incf i)
                  (setf (aref (dss-sa dss) k) (aref (dss-sa dss) i))
                  (incf k)

                  (when (<= last k)
                    (loop while (< j buf-end) do
                      (setf (aref (dss-sa dss) i) (aref buf j))
                      (incf i)
                      (setf (aref buf j) (aref (dss-sa dss) i))
                      (incf j))

                    (setf (aref (dss-sa dss) i) (aref buf j))
                    (setf (aref buf j) x)
                    (return-from dss-ss-merge-forward))

                  (break-unless (minusp (aref (dss-sa dss) k)))))

               (t
                (setf (aref (dss-sa dss) k) (lognot-i32 (aref (dss-sa dss) k)))
                (loop do
                  (setf (aref (dss-sa dss) i) (aref buf j))
                  (incf i)
                  (when (<= buf-end j)
                    (setf (aref buf j) x)
                    (return-from dss-ss-merge-forward))

                  (setf (aref buf j) (aref (dss-sa dss) i))
                  (incf j)

                  (break-unless (minusp (aref buf j))))

                (loop do
                  (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
                  (incf i)
                  (setf (aref (dss-sa dss) k) (aref (dss-sa dss) i))
                  (incf k)

                  (when (<= last k)
                    (loop while (< j buf-end) do
                      (setf (aref (dss-sa dss) i) (aref buf j))
                      (incf i)
                      (setf (aref buf j) (aref (dss-sa dss) i))
                      (incf j))

                    (setf (aref (dss-sa dss) i) (aref buf j))
                    (setf (aref buf j) x)
                    (return-from dss-ss-merge-forward))

                  (break-unless (minusp (aref (dss-sa dss) k))))))))
  nil)

(define-typed-fn dss-ss-merge-backward ((div-suf-sort dss) (t/int32 pa) (t/int32-array buf) (t/int32 buf-offset)
                                        (t/int32 first middle last depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((buf-end 0)
        (p1 0)
        (p2 0)
        (x 0)
        (tt 0)
        (i 0)
        (j 0)
        (k 0))
    (declare (type t/int32 buf-end p1 p2 x tt i j k))
    (setf buf-end (+ buf-offset (- last middle)))
    (dss-ss-block-swap buf buf-offset (dss-sa dss) middle (- last middle))

    (cond
      ((minusp (aref buf (1- buf-end)))
       (logiorf x 1)
       (setf p1 (+ pa (lognot-i32 (aref buf (1- buf-end))))))
      (t
       (setf p1 (+ pa (aref buf (1- buf-end))))))

    (cond
      ((minusp (aref (dss-sa dss) (1- middle)))
       (logiorf x 2)
       (setf p2 (+ pa (lognot-i32 (aref (dss-sa dss) (1- middle))))))
      (t
       (setf p2 (+ pa (aref (dss-sa dss) (1- middle))))))

    (setf tt (aref (dss-sa dss) (1- last)))
    (setf i (1- last))
    (setf j (1- buf-end))
    (setf k (1- middle))

    (loop for r fixnum = (dss-ss-compare dss p1 p2 depth) do
      (cond
        ((plusp r)
         (when (flag? x 1)
           (loop do
             (setf (aref (dss-sa dss) i) (aref buf j))
             (decf i)
             (setf (aref buf j) (aref (dss-sa dss) i))
             (decf j)
             (break-unless (minusp (aref buf j))))
           (logxorf x 1))

         (setf (aref (dss-sa dss) i) (aref buf j))
         (decf i)
         (when (<= j buf-offset)
           (setf (aref buf j) tt)
           (return-from dss-ss-merge-backward))

         (setf (aref buf j) (aref (dss-sa dss) i))
         (decf j)

         (cond
           ((minusp (aref buf j))
            (logiorf x 1)
            (setf p1 (+ pa (lognot-i32 (aref buf j)))))
           (t
            (setf p1 (+ pa (aref buf j))))))

        ((minusp r)
         (when (flag? x 2)
           (loop do
             (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
             (decf i)
             (setf (aref (dss-sa dss) k) (aref (dss-sa dss) i))
             (decf k)
             (break-unless (minusp (aref (dss-sa dss) k))))
           (logxorf x 2))

         (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
         (decf i)
         (setf (aref (dss-sa dss) k) (aref (dss-sa dss) i))
         (decf k)

         (when (< k first)
           (loop while (< buf-offset j) do
             (setf (aref (dss-sa dss) i) (aref buf j))
             (decf i)
             (setf (aref buf j) (aref (dss-sa dss) i))
             (decf j))

           (setf (aref (dss-sa dss) i) (aref buf j))
           (setf (aref buf j) tt)
           (return-from dss-ss-merge-backward))

         (cond
           ((minusp (aref (dss-sa dss) k))
            (logiorf x 2)
            (setf p2 (+ pa (lognot-i32 (aref (dss-sa dss) k)))))
           (t
            (setf p2 (+ pa (aref (dss-sa dss) k))))))

        (t
         (when (flag? x 1)
           (loop do
             (setf (aref (dss-sa dss) i) (aref buf j))
             (decf i)
             (setf (aref buf j) (aref (dss-sa dss) i))
             (decf j)
             (break-unless (minusp (aref buf j))))
           (logxorf x 1))

         (setf (aref (dss-sa dss) i) (lognot-i32 (aref buf j)))
         (decf i)
         (when (<= j buf-offset)
           (setf (aref buf j) tt)
           (return-from dss-ss-merge-backward))

         (setf (aref buf j) (aref (dss-sa dss) i))
         (decf j)

         (when (flag? x 2)
           (loop do
             (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
             (decf i)
             (setf (aref (dss-sa dss) k) (aref (dss-sa dss) i))
             (decf k)
             (break-unless (minusp (aref (dss-sa dss) k))))
           (logxorf x 2))

         (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
         (decf i)
         (setf (aref (dss-sa dss) k) (aref (dss-sa dss) i))
         (decf k)

         (when (< k first)
           (loop while (< buf-offset j) do
             (setf (aref (dss-sa dss) i) (aref buf j))
             (decf i)
             (setf (aref buf j) (aref (dss-sa dss) i))
             (decf j))

           (setf (aref (dss-sa dss) i) (aref buf j))
           (setf (aref buf j) tt)
           (return-from dss-ss-merge-backward))

         (cond
           ((minusp (aref buf j))
            (logiorf x 1)
            (setf p1 (+ pa (lognot-i32 (aref buf j)))))
           (t
            (setf p1 (+ pa (aref buf j)))))

         (cond
           ((minusp (aref (dss-sa dss) k))
            (logiorf x 2)
            (setf p2 (+ pa (lognot-i32 (aref (dss-sa dss) k)))))
           (t
            (setf p2 (+ pa (aref (dss-sa dss) k)))))))))
  nil)

(define-typed-fn dss-ss-merge ((div-suf-sort dss) (t/int32 pa the-first the-middle the-last)
                               (t/int32-array buf) (t/int32 buf-offset buf-size depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (labels
      ((get-idx (%a)
         (declare (type t/int32 %a)
                  (optimize speed (debug 0) (safety 0) (compilation-speed 0)))
         (if (<= 0 %a)
             %a
             (lognot-i32 %a)))

       (check-eq (%pa %depth %a)
         (declare (type t/int32 %pa %depth %a)
                  (optimize speed (compilation-speed 0)))
         (when (and (<= 0 (aref (dss-sa dss) %a))
                    (zerop (dss-ss-compare dss
                                           (+ %pa (get-idx (aref (dss-sa dss) (1- %a))))
                                           (+ %pa (aref (dss-sa dss) %a))
                                           %depth)))
           (setf (aref (dss-sa dss) %a) (lognot-i32 (aref (dss-sa dss) %a))))))
    (declare (inline get-idx check-eq))

    (make-stack dss)
    (let* ((first the-first)
           (middle the-middle)
           (last the-last)
           (check 0)
           (m 0)
           (len 0)
           (half 0)
           (j 0)
           (i 0)
           (nxt 0))
      (declare (type t/int32 check m len half j i nxt))

      (loop do
        (tagbody
           (when (<= (- last middle) buf-size)
             (when (and (< first middle)
                        (< middle last))
               (dss-ss-merge-backward dss pa buf buf-offset first middle last depth))

             (when (flag? check 1) (check-eq pa depth first))
             (when (flag? check 2) (check-eq pa depth last))

             (when (stack-empty-p dss)
               (return-from dss-ss-merge))
             (let ((entry (pop-stack dss)))
               (setf first (stack-entry-a entry))
               (setf middle (stack-entry-b entry))
               (setf last (stack-entry-c entry))
               (setf check (stack-entry-d entry))
               (go ==next==)))

           (when (<= (- middle first) buf-size)
             (when (< first middle)
               (dss-ss-merge-forward dss pa buf buf-offset first middle last depth))

             (when (flag? check 1) (check-eq pa depth first))
             (when (flag? check 2) (check-eq pa depth last))

             (when (stack-empty-p dss)
               (return-from dss-ss-merge))
             (let ((entry (pop-stack dss)))
               (setf first (stack-entry-a entry))
               (setf middle (stack-entry-b entry))
               (setf last (stack-entry-c entry))
               (setf check (stack-entry-d entry))
               (go ==next==)))

           (setf m 0)
           (setf len (min (- middle first) (- last middle)))
           (setf half (ash len -1))

           (loop while (< 0 len) do
             (when (minusp
                    (dss-ss-compare dss
                                    (+ pa (get-idx (aref (dss-sa dss) (+ middle m half))))
                                    (+ pa (get-idx (aref (dss-sa dss) (- middle m half 1))))
                                    depth))
               (incf m (1+ half))
               (decf half (logxor (logand len 1) 1)))

             (setf len half)
             (setf half (ash half -1)))

           (cond
             ((< 0 m)
              (dss-ss-block-swap (dss-sa dss) (- middle m) (dss-sa dss) middle m)
              (setf j middle)
              (setf i middle)
              (setf nxt 0)

              (when (< (+ middle m) last)
                (when (minusp (aref (dss-sa dss) (+ middle m)))
                  (loop while (minusp (aref (dss-sa dss) (1- i))) do
                    (decf i))
                  (setf (aref (dss-sa dss) (+ middle m))
                        (lognot-i32 (aref (dss-sa dss) (+ middle m)))))

                (setf j middle)
                (loop while (minusp (aref (dss-sa dss) j)) do
                  (incf j))
                (setf nxt 1))

              (cond
                ((<= (- i first) (- last j))
                 (push-stack dss j (+ middle m) last (logior (logand check 2) (logand nxt 1)))
                 (decf middle m)
                 (setf last i)
                 (logandf check 1))
                (t
                 (when (and (= i middle) (= middle j))
                   (setf nxt (coerce-to-int32 (ash nxt 1))))
                 (push-stack dss first (- middle m) i (logior (logand check 1) (logand nxt 2)))
                 (setf first j)
                 (incf middle m)
                 (setf check (logior (logand check 2) (logand nxt 1))))))

             (t
              (when (flag? check 1) (check-eq pa depth first))
              (check-eq pa depth middle)
              (when (flag? check 2) (check-eq pa depth last))

              (when (stack-empty-p dss)
                (return-from dss-ss-merge))
              (let ((entry (pop-stack dss)))
                (setf first (stack-entry-a entry))
                (setf middle (stack-entry-b entry))
                (setf last (stack-entry-c entry))
                (setf check (stack-entry-d entry)))))
         ==next==))))
  nil)

(define-typed-fn dss-sub-string-sort ((div-suf-sort dss) (t/int32 pa the-first last)
                                      (t/int32-array buf) (t/int32 buf-offset buf-size depth)
                                      (boolean last-suffix?) (t/int32 size))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((first the-first)
        (a 0)
        (i 0)
        (k 0)
        (cur-buf-offset 0)
        (cur-buf-size 0)
        (b 0)
        (j 0))
    (declare (type t/int32 a i k cur-buf-offset cur-buf-size b j))

    (when last-suffix?
      (incf first))

    (setf a first)
    (loop while (< (+ a +ss-block-size+) last) do
      (dss-ss-multi-key-intro-sort dss pa a (+ a +ss-block-size+) depth)
      (let ((cur-buf (dss-sa dss)))
        (setf cur-buf-offset (+ a +ss-block-size+))
        (setf cur-buf-size (- last (+ a +ss-block-size+)))
        (when (<= cur-buf-size buf-size)
          (setf cur-buf-size buf-size)
          (setf cur-buf buf)
          (setf cur-buf-offset buf-offset))

        (setf b a)
        (setf k +ss-block-size+)
        (setf j i)

        (loop while (flag? j 1) do
          (dss-ss-merge dss pa (- b k) b (+ b k) cur-buf cur-buf-offset cur-buf-size depth)
          (decf b k)
          (setf k (coerce-to-int32 (ash k 1)))
          (setf j (ash j -1)))

        (incf a +ss-block-size+)
        (incf i)))

    (dss-ss-multi-key-intro-sort dss pa a last depth)

    (setf k +ss-block-size+)
    (loop until (zerop i) do
      (when (flag? i 1)
        (dss-ss-merge dss pa (- a k) a last buf buf-offset buf-size depth)
        (decf a k))
      (setf k (coerce-to-int32 (ash k 1)))
      (setf i (ash i -1)))

    (when last-suffix?
      (setf a first)
      (setf i (aref (dss-sa dss) (1- first)))
      (loop with r fixnum = 1
            while (and (< a last)
                       (or (minusp (aref (dss-sa dss) a))
                           (< 0 (setf r (dss-ss-compare-last dss
                                                             pa
                                                             (+ pa i)
                                                             (+ pa (aref (dss-sa dss) a))
                                                             depth
                                                             size)))))
            do (setf (aref (dss-sa dss) (1- a)) (aref (dss-sa dss) a))
               (incf a)
            finally
               (when (zerop r)
                 (setf (aref (dss-sa dss) a) (lognot-i32 (aref (dss-sa dss) a))))
               (setf (aref (dss-sa dss) (1- a)) i))))
  nil)

(define-typed-fn dss-tr-get-c ((div-suf-sort dss) (t/int32 isa isad isan x))
    (t/int32 t)
  (declare (optimize speed (debug 1)))
  (if (< (+ isad x) isan)
      (aref (dss-sa dss) (+ isad x))
      (aref (dss-sa dss) (+ isa
                            (mod (+ (- isad isa) x)
                                 (- isan isa))))))

(define-typed-fn dss-tr-fixdown ((div-suf-sort dss) (t/int32 isa isad isan sa the-i size))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((i the-i)
         (j 0)
         (k 0)
         (v (aref (dss-sa dss) (+ sa i)))
         (c (dss-tr-get-c dss isa isad isan v))
         (d 0)
         (e 0))
    (declare (type t/int32 i j k v c d e))

    (loop while (< (setf j (1+ (* 2 i))) size) do
      (setf k j)
      (incf j)
      (setf d (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) (+ sa k))))
      (when (< d (setf e (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) (+ sa j)))))
        (setf k j)
        (setf d e))

      (break-when (<= d c))
      (setf (aref (dss-sa dss) (+ sa i))
            (aref (dss-sa dss) (+ sa k)))
      (setf i k))

    (setf (aref (dss-sa dss) (+ sa i)) v))
  nil)

(define-typed-fn dss-tr-heap-sort ((div-suf-sort dss) (t/int32 isa isad isan sa size))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((m size))
    (declare (type t/int32 m))
    (when (zerop (mod size 2))
      (decf m 1)
      (when (< (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) (+ sa (truncate m 2))))
               (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) (+ sa m))))
        (dss-swap (dss-sa dss) (+ sa m)
                  (dss-sa dss) (+ sa (truncate m 2)))))

    (loop for i fixnum from (1- (truncate m 2)) downto 0 do
      (dss-tr-fixdown dss isa isad isan sa i m))

    (when (zerop (mod size 2))
      (dss-swap (dss-sa dss) sa (dss-sa dss) (+ sa m))
      (dss-tr-fixdown dss isa isad isan sa 0 m))

    (loop for i fixnum from (1- m) downto 1
          for tt fixnum = (aref (dss-sa dss) sa)
          do (setf (aref (dss-sa dss) sa) (aref (dss-sa dss) (+ sa i)))
             (dss-tr-fixdown dss isa isad isan sa 0 i)
             (setf (aref (dss-sa dss) (+ sa i)) tt)))
  nil)

(define-typed-fn dss-tr-insertion-sort ((div-suf-sort dss) (t/int32 isa isad isan first last))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((a (1+ first))
        (b 0)
        (tt 0)
        (r 0))
    (declare (type t/int32 a b tt r))

    (loop while (< a last) do
      (setf tt (aref (dss-sa dss) a))
      (setf b (1- a))
      (setf r 0)
      (loop while (> 0 (setf r (- (dss-tr-get-c dss isa isad isan tt)
                                  (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b)))))
            do
               (loop do
                 (setf (aref (dss-sa dss) (1+ b))
                       (aref (dss-sa dss) b))
                 (break-unless (and (<= first (decf b))
                                    (minusp (aref (dss-sa dss) b)))))
               (break-when (< b first)))

      (when (zerop r)
        (setf (aref (dss-sa dss) b) (lognot-i32 (aref (dss-sa dss) b))))
      (setf (aref (dss-sa dss) (1+ b)) tt)
      (incf a)))
  nil)

(define-typed-fn dss-tr-log ((t/int32 x))
    (t/int32 t)
  (declare (optimize speed (debug 1)))
  (if (flag? x #xFFFF0000)
      (if (flag? x #xFF000000)
          (+ 24 (aref +log-2-table+ (logand (ash x -24) #xFF)))
          (+ 16 (aref +log-2-table+ (logand (ash x -16) #xFF))))
      (if (flag? x #x0000FF00)
          (+ 8 (aref +log-2-table+ (logand (ash x -8) #xFF)))
          (aref +log-2-table+ (logand x #xFF)))))

(define-typed-fn dss-tr-median-3 ((div-suf-sort dss) (t/int32 isa isad isan the-v1 the-v2 the-v3))
    (t/int32 :no)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((v1 the-v1)
         (v2 the-v2)
         (v3 the-v3)
         (sav1 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v1)))
         (sav2 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v2)))
         (sav3 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v3))))
    (declare (type t/int32 v1 v2 v3 sav1 sav2 sav3))

    (when (> sav1 sav2)
      (psetf v1 v2
             v2 v1
             sav1 sav2
             sav2 sav1))

    (if (> sav2 sav3)
        (if (> sav1 sav3)
            v1
            v3)
        v2)))

(define-typed-fn dss-tr-median-5 ((div-suf-sort dss) (t/int32 isa isad isan the-v1 the-v2 the-v3 the-v4 the-v5))
    (t/int32 :no)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((v1 the-v1)
         (v2 the-v2)
         (v3 the-v3)
         (v4 the-v4)
         (v5 the-v5)
         (sav1 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v1)))
         (sav2 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v2)))
         (sav3 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v3)))
         (sav4 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v4)))
         (sav5 (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) v5))))
    (declare (type t/int32 v1 v2 v3 v4 v5 sav1 sav2 sav3 sav4 sav5))

    (when (> sav2 sav3)
      (psetf v2 v3
             v3 v2
             sav2 sav3
             sav3 sav2))

    (when (> sav4 sav5)
      (psetf v4 v5
             v5 v4
             sav4 sav5
             sav5 sav4))

    (when (> sav2 sav4)
      (psetf v4 v2
             sav2 sav4
             sav4 sav2
             v3 v5
             v5 v3
             sav3 sav5
             sav5 sav3))

    (when (> sav1 sav3)
      (psetf v1 v3
             v3 v1
             sav1 sav3
             sav3 sav1))

    (when (> sav1 sav4)
      (psetf v4 v1
             sav1 sav4
             sav4 sav1
             v3 v5
             sav3 sav5
             sav5 sav3))

    (if (> sav3 sav4)
        v4
        v3)))

(define-typed-fn dss-tr-pivot ((div-suf-sort dss) (t/int32 isa isad isan first last))
    (t/int32)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((tt (- last first))
         (middle (+ first (truncate tt 2))))
    (declare (type t/int32 tt middle))
    (when (<= tt 512)
      (when (<= tt 32)
        (return-from dss-tr-pivot (dss-tr-median-3 dss isa isad isan first middle (1- last))))
      (setf tt (ash tt -2))
      (return-from dss-tr-pivot
        (dss-tr-median-5 dss isa isad isan first (+ first tt) middle (- last 1 tt) (1- last))))

    (setf tt (ash tt -3))
    (dss-tr-median-3 dss
                     isa
                     isad
                     isan
                     (dss-tr-median-3 dss isa isad isan first                 (+ first tt)  (+ first (ash tt 1)))
                     (dss-tr-median-3 dss isa isad isan (- middle tt)         middle        (+ middle tt))
                     (dss-tr-median-3 dss isa isad isan (- last 1 (ash tt 1)) (- last 1 tt) (1- last)))))

(define-typed-fn dss-ls-update-group ((div-suf-sort dss) (t/int32 isa first last))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (loop with b fixnum = 0
        with tt fixnum = 0
        for a from first below last do
          (setf b 0)
          (when (<= 0 (aref (dss-sa dss) a))
            (setf b a)
            (loop do
              (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) a))) a)
              (break-unless (and (< (incf a) last)
                                 (<= 0 (aref (dss-sa dss) a)))))

            (setf (aref (dss-sa dss) b) (- b a))
            (break-when (<= last a)))

          (setf b a)
          (loop do
            (setf (aref (dss-sa dss) a) (lognot-i32 (aref (dss-sa dss) a)))
            (break-unless (minusp (aref (dss-sa dss) (incf a)))))

          (setf tt a)
          (loop do
            (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) b))) tt)
            (break-unless (<= (incf b) a))))
  nil)

(define-typed-fn dss-ls-intro-sort ((div-suf-sort dss) (t/int32 isa isad isan the-first the-last))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (make-stack dss)
  (let* ((first the-first)
         (last the-last)
         (x 0)
         (a 0)
         (b 0)
         (v 0)
         (c 0)
         (d 0)
         (s 0)
         (tt 0)
         (e 0)
         (f 0)
         (limit (dss-tr-log (- last first))))
    (declare (type t/int32 x a b v c d s tt e f limit first last))

    (loop do
      (tagbody
         (when (<= (- last first) +insertion-sort-threshold+)
           (cond
             ((< 1 (- last first))
              (dss-tr-insertion-sort dss isa isad isan first last)
              (dss-ls-update-group dss isa first last))
             ((= (- last first) 1)
              (setf (aref (dss-sa dss) first) -1)))

           (when (stack-empty-p dss)
             (return-from dss-ls-intro-sort))
           (let ((entry (pop-stack dss)))
             (setf first (stack-entry-a entry))
             (setf last (stack-entry-b entry))
             (setf limit (stack-entry-c entry))
             (go ==next==)))

         (setf a 0)
         (setf b 0)
         (cond
           ((zerop limit)
            (decf limit)
            (dss-tr-heap-sort dss isa isad isan first (- last first))
            (setf a (1- last))
            (loop while (< first a) do
              (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) a)))
              (setf b (1- a))
              (loop while (and (<= first b)
                               (= (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b)) x))
                    do (setf (aref (dss-sa dss) b) (lognot-i32 (aref (dss-sa dss) b)))
                       (decf b))
              (setf a b))

            (dss-ls-update-group dss isa first last)
            (when (stack-empty-p dss)
              (return-from dss-ls-intro-sort))
            (let ((entry (pop-stack dss)))
              (setf first (stack-entry-a entry))
              (setf last (stack-entry-b entry))
              (setf limit (stack-entry-c entry))
              (go ==next==)))
           (t
            (decf limit)))

         (setf a (dss-tr-pivot dss isa isad isan first last))
         (dss-swap (dss-sa dss) first (dss-sa dss) a)
         (setf v (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) first)))

         (setf b first)
         (loop while (and (< (incf b) last)
                          (= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v)))

         (when (and (< (setf a b) last)
                    (< x v))
           (loop while (and (< (incf b) last)
                            (<= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) b (dss-sa dss) a)
                      (incf a))))

         (setf c last)
         (loop while (and (< b (decf c))
                          (= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v)))

         (when (and (< b (setf d c))
                    (> x v))
           (loop while (and (< b (decf c))
                            (>= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) c (dss-sa dss) d)
                      (decf d))))

         (loop while (< b c) do
           (dss-swap (dss-sa dss) b (dss-sa dss) c)
           (loop while (and (< (incf b) c)
                            (<= (setf c (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) b (dss-sa dss) a)
                      (incf a)))

           (loop while (and (< b (decf c))
                            (>= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) c (dss-sa dss) d)
                      (decf d))))

         (cond
           ((<= a d)
            (setf c (1- b))

            (when (> (setf s (- a first))
                     (setf tt (- b a)))
              (setf s tt))

            (setf e first)
            (setf f (- b s))
            (loop while (< 0 s) do
              (dss-swap (dss-sa dss) e (dss-sa dss) f)
              (decf s)
              (incf e)
              (incf f))

            (when (> (setf s (- d c))
                     (setf tt (- last d 1)))
              (setf s tt))

            (setf e b)
            (setf f (- last s))
            (loop while (< 0 s) do
              (dss-swap (dss-sa dss) e (dss-sa dss) f)
              (decf s)
              (incf e)
              (incf f))

            (setf a (+ first (- b a)))
            (setf b (- last (- d c)))

            (setf c first)
            (setf v (1- a))
            (loop while (< c a) do
              (setf (aref (dss-sa dss)
                          (+ isa (aref (dss-sa dss) c)))
                    v)
              (incf c))

            (when (< b last)
              (setf c a)
              (setf v (1- b))
              (loop while (< c b) do
                (setf (aref (dss-sa dss)
                            (+ isa (aref (dss-sa dss) c)))
                      v)
                (incf c)))

            (when (= (- b a) 1)
              (setf (aref (dss-sa dss) a) -1))

            (if (<= (- a first) (- last b))
                (cond
                  ((< first a)
                   (push-stack dss b last limit 0)
                   (setf last a))
                  (t
                   (setf first b)))
                (cond
                  ((< b last)
                   (push-stack dss first a limit 0)
                   (setf first b))
                  (t
                   (setf last a)))))

           (t
            (when (stack-empty-p dss)
              (return-from dss-ls-intro-sort))
            (let ((entry (pop-stack dss)))
              (setf first (stack-entry-a entry))
              (setf last (stack-entry-b entry))
              (setf limit (stack-entry-c entry)))))
       ==next==)))
  nil)

(define-typed-fn dss-ls-sort ((div-suf-sort dss) (t/int32 isa x depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((isad (+ isa depth))
        (first 0)
        (skip 0)
        (last 0)
        (tt 0))
    (declare (type t/int32 isad first skip last tt))

    (loop while (< (- x) (aref (dss-sa dss) 0)) do
      (setf first 0)
      (setf skip 0)
      (setf last 0)
      (setf tt 0)

      (loop do
        (cond
          ((minusp (setf tt (aref (dss-sa dss) first)))
           (decf first tt)
           (incf skip tt))
          (t
           (unless (zerop skip)
             (setf (aref (dss-sa dss)
                         (+ first skip))
                   skip)
             (setf skip 0))
           (setf last (1+ (aref (dss-sa dss)
                                (+ isa tt))))
           (dss-ls-intro-sort dss isa isad (+ isa x) first last)
           (setf first last)))
        (break-unless (< first x)))

      (unless (zerop skip)
        (setf (aref (dss-sa dss) (+ first skip)) skip))

      (when (< x (- isad isa))
        (setf first 0)
        (loop do
          (cond
            ((minusp (setf tt (aref (dss-sa dss) first)))
             (decf first tt))
            (t
             (setf last (1+ (aref (dss-sa dss) (+ isa tt))))
             (loop for i fixnum from first below last do
               (setf (aref (dss-sa dss)
                           (+ isa (aref (dss-sa dss) i)))
                     i))
             (setf first last)))
          (break-unless (< first x)))
        (loop-finish))

      (incf isad (- isad isa))))
  nil)

(define-typed-fn dss-tr-partition ((div-suf-sort dss) (t/int32 isa isad isan the-first the-last v))
    (partition-result)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let* ((first the-first)
         (last the-last)
         (a 0)
         (c 0)
         (d 0)
         (tt 0)
         (s 0)
         (e 0)
         (f 0)
         (x 0)
         (b (1- first)))
    (declare (type t/int32 a c d tt s e f x b first last))
    (loop while (and (< (incf b) last)
                     (= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v)))

    (when (and (< (setf a b) last)
               (< x v))
      (loop while (and (< (incf b) last)
                       (<= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v))
            do (when (= x v)
                 (dss-swap (dss-sa dss) b (dss-sa dss) a)
                 (incf a))))

    (setf c last)
    (loop while (and (< b (decf c))
                     (= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v)))

    (when (and (< b (setf d c))
               (> x v))
      (loop while (and (< b (decf c))
                       (>= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v))
            do (when (= x v)
                 (dss-swap (dss-sa dss) c (dss-sa dss) d)
                 (decf d))))

    (loop while (< b c) do
      (dss-swap (dss-sa dss) b (dss-sa dss) c)
      (loop while (and (< (incf b) c)
                       (<= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v))
            do (when (= x v)
                 (dss-swap (dss-sa dss) b (dss-sa dss) a)
                 (incf a)))

      (loop while (and (< b (decf c))
                       (>= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v))
            do (when (= x v)
                 (dss-swap (dss-sa dss) c (dss-sa dss) d)
                 (decf d))))

    (when (<= a d)
      (setf c (1- b))

      (when (> (setf s (- a first))
               (setf tt (- b a)))
        (setf s tt))

      (setf e first)
      (setf f (- b s))
      (loop while (< 0 s) do
        (dss-swap (dss-sa dss) e (dss-sa dss) f)
        (decf s)
        (incf e)
        (incf f))

      (when (> (setf s (- d c))
               (setf tt (- last d 1)))
        (setf s tt))

      (setf e b)
      (setf f (- last s))
      (loop while (< 0 s) do
        (dss-swap (dss-sa dss) e (dss-sa dss) f)
        (decf s)
        (incf e)
        (incf f))

      (incf first (- b a))
      (decf last (- d c)))

    (make-partition-result :first first :last last)))

(define-typed-fn dss-tr-copy ((div-suf-sort dss) (t/int32 isa isan first a b last depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((c first)
        (d (1- a))
        (e 0)
        (s 0)
        (v (1- b)))
    (declare (type t/int32 c d e s v))

    (loop while (<= c d) do
      (when (minusp (setf s (- (aref (dss-sa dss) c) depth)))
        (incf s (- isan isa)))

      (when (= (aref (dss-sa dss) (+ isa s)) v)
        (incf d)
        (setf (aref (dss-sa dss) d) s)
        (setf (aref (dss-sa dss) (+ isa s)) d))

      (incf c))

    (setf c (1- last))
    (setf e (1+ d))
    (setf d b)
    (loop while (< e d) do
      (when (minusp (setf s (- (aref (dss-sa dss) c) depth)))
        (incf s (- isan isa)))

      (when (= (aref (dss-sa dss) (+ isa s)) v)
        (setf (aref (dss-sa dss) (decf d)) s)
        (setf (aref (dss-sa dss) (+ isa s)) d))

      (decf c)))
  nil)

(define-typed-fn dss-tr-intro-sort ((div-suf-sort dss) (t/int32 isa the-isad isan the-first the-last)
                                    (tr-budget budget) (t/int32 size))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (make-stack dss)
  (let* ((isad the-isad)
         (first the-first)
         (last the-last)
         (s 0)
         (x 0)
         (a 0)
         (b 0)
         (c 0)
         (v 0)
         (d 0)
         (tt 0)
         (e 0)
         (f 0)
         (nxt 0)
         (limit (dss-tr-log (- last first))))
    (declare (type t/int32 s x a b c v d tt e f nxt limit isad first last))

    (loop do
      (tagbody
         (setf a 0)
         (setf b 0)
         (setf c 0)
         (setf v 0)
         (setf nxt 0)

         (when (< limit 0)
           (cond
             ((= limit -1)
              (break-unless (tr-budget-update budget size (- last first)))
              (let ((result (dss-tr-partition dss isa (1- isad) isan first last (1- last))))
                (setf a (partition-result-first result))
                (setf b (partition-result-last result))

                (cond
                  ((or (< first a) (< b last))
                   (when (< a last)
                     (setf c first)
                     (setf v (1- a))
                     (loop while (< c a) do
                       (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) c))) v)
                       (incf c)))

                   (when (< b last)
                     (setf c a)
                     (setf v (1- b))
                     (loop while (< c b) do
                       (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) c))) v)
                       (incf c)))

                   (push-stack dss 0 a b 0)
                   (push-stack dss (1- isad) first last -2)

                   (cond
                     ((<= (- a first) (- last b))
                      (cond
                        ((< 1 (- a first))
                         (push-stack dss isad b last (dss-tr-log (- last b)))
                         (setf last a)
                         (setf limit (dss-tr-log (- a first))))
                        ((< 1 (- last b))
                         (setf first b)
                         (setf limit (dss-tr-log (- last b))))
                        (t
                         (when (stack-empty-p dss)
                           (return-from dss-tr-intro-sort))
                         (let ((entry (pop-stack dss)))
                           (setf isad (stack-entry-a entry))
                           (setf first (stack-entry-b entry))
                           (setf last (stack-entry-c entry))
                           (setf limit (stack-entry-d entry))))))

                     (t
                      (cond
                        ((< 1 (- last b))
                         (push-stack dss isad first a (dss-tr-log (- a first)))
                         (setf first b)
                         (setf limit (dss-tr-log (- last b))))
                        ((< 1 (- a first))
                         (setf last a)
                         (setf limit (dss-tr-log (- a first))))
                        (t
                         (when (stack-empty-p dss)
                           (return-from dss-tr-intro-sort))
                         (let ((entry (pop-stack dss)))
                           (setf isad (stack-entry-a entry))
                           (setf first (stack-entry-b entry))
                           (setf last (stack-entry-c entry))
                           (setf limit (stack-entry-d entry))))))))

                  (t
                   (setf c first)
                   (loop while (< c last) do
                     (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) c))) c)
                     (incf c))

                   (when (stack-empty-p dss)
                     (return-from dss-tr-intro-sort))
                   (let ((entry (pop-stack dss)))
                     (setf isad (stack-entry-a entry))
                     (setf first (stack-entry-b entry))
                     (setf last (stack-entry-c entry))
                     (setf limit (stack-entry-d entry)))))))

             ((= limit -2)
              (let ((entry (pop-stack dss)))
                (setf a (stack-entry-b entry))
                (setf b (stack-entry-c entry))
                (dss-tr-copy dss isa isan first a b last (- isad isa))

                (when (stack-empty-p dss)
                  (return-from dss-tr-intro-sort))
                (let ((entry (pop-stack dss)))
                  (setf isad (stack-entry-a entry))
                  (setf first (stack-entry-b entry))
                  (setf last (stack-entry-c entry))
                  (setf limit (stack-entry-d entry)))))

             (t
              (when (<= 0 (aref (dss-sa dss) first))
                (setf a first)
                (loop do
                  (setf (aref (dss-sa dss)
                              (+ isa (aref (dss-sa dss) a)))
                        a)
                  (break-unless (and (< (incf a) last)
                                     (<= 0 (aref (dss-sa dss) a))))
                  (setf first a)))

              (cond
                ((< first last)
                 (setf a first)
                 (loop do
                   (setf (aref (dss-sa dss) a) (lognot-i32 (aref (dss-sa dss) a)))
                   (break-unless (minusp (aref (dss-sa dss) (incf a)))))

                 (setf nxt (if (/= (aref (dss-sa dss) (+ isa  (aref (dss-sa dss) a)))
                                   (aref (dss-sa dss) (+ isad (aref (dss-sa dss) a))))
                               (dss-tr-log (1+ (- a first)))
                               -1))

                 (when (< (incf a) last)
                   (setf b first)
                   (setf v (1- a))
                   (loop while (< b a) do
                     (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) b))) v)
                     (incf b)))

                 (cond
                   ((<= (- a first) (- last a))
                    (push-stack dss isad a last -3)
                    (incf isad)
                    (setf last a)
                    (setf limit nxt))
                   (t
                    (cond
                      ((< 1 (- last a))
                       (push-stack dss (1+ isad) first a nxt)
                       (setf first a)
                       (setf limit -3))
                      (t
                       (incf isad)
                       (setf last a)
                       (setf limit nxt))))))

                (t
                 (when (stack-empty-p dss)
                   (return-from dss-tr-intro-sort))
                 (let ((entry (pop-stack dss)))
                   (setf isad (stack-entry-a entry))
                   (setf first (stack-entry-b entry))
                   (setf last (stack-entry-c entry))
                   (setf limit (stack-entry-d entry)))))))
           (go ==next==))

         (when (<= (- last first) +insertion-sort-threshold+)
           (break-unless (tr-budget-update budget size (- last first)))
           (dss-tr-insertion-sort dss isa isad isan first last)
           (setf limit -3)
           (go ==next==))

         (cond
           ((zerop limit)
            (decf limit)
            (break-unless (tr-budget-update budget size (- last first)))
            (dss-tr-heap-sort dss isa isad isan first (- last first))

            (setf a (1- last))
            (loop while (< first a) do
              (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) a)))
              (setf b (1- a))

              (loop while (and (<= first b)
                               (= (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b)) x))
                    do (setf (aref (dss-sa dss) b) (lognot-i32 (aref (dss-sa dss) b)))
                       (decf b))

              (setf a b))

            (setf limit -3)
            (go ==next==))

           (t
            (decf limit)))

         (setf a (dss-tr-pivot dss isa isad isan first last))

         (dss-swap (dss-sa dss) first (dss-sa dss) a)
         (setf v (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) first)))
         (setf b first)
         (loop while (and (< (incf b) last)
                          (= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v)))

         (when (and (< (setf a b) last)
                    (< x v))
           (loop while (and (< (incf b) last)
                            (<= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) b (dss-sa dss) a)
                      (incf a))))

         (setf c last)
         (loop while (and (< b (decf c))
                          (= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v)))

         (when (and (< b (setf d c))
                    (> x v))
           (loop while (and (< b (decf c))
                            (>= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) c (dss-sa dss) d)
                      (decf d))))

         (loop while (< b c) do
           (dss-swap (dss-sa dss) b (dss-sa dss) c)
           (loop while (and (< (incf b) c)
                            (<= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) b))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) b (dss-sa dss) a)
                      (incf a)))

           (loop while (and (< b (decf c))
                            (>= (setf x (dss-tr-get-c dss isa isad isan (aref (dss-sa dss) c))) v))
                 do (when (= x v)
                      (dss-swap (dss-sa dss) c (dss-sa dss) d)
                      (decf d))))

         (cond
           ((<= a d)
            (setf c (1- b))

            (when (> (setf s (- a first))
                     (setf tt (- b a)))
              (setf s tt))

            (setf e first)
            (setf f (- b s))
            (loop while (< 0 s) do
              (dss-swap (dss-sa dss) e (dss-sa dss) f)
              (decf s)
              (incf e)
              (incf f))

            (when (> (setf s (- d c))
                     (setf tt (- last d 1)))
              (setf s tt))

            (setf e b)
            (setf f (- last s))
            (loop while (< 0 s) do
              (dss-swap (dss-sa dss) e (dss-sa dss) f)
              (decf s)
              (incf e)
              (incf f))

            (setf a (+ first (- b a)))
            (setf b (- last (- d c)))
            (setf nxt (if (/= (aref (dss-sa dss) (+ isa (aref (dss-sa dss) a))) v)
                          (dss-tr-log (- b a))
                          -1))

            (setf c first)
            (setf v (1- a))
            (loop while (< c a) do
              (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) c))) v)
              (incf c))

            (when (< b last)
              (setf c a)
              (setf v (1- b))
              (loop while (< c b) do
                (setf (aref (dss-sa dss) (+ isa (aref (dss-sa dss) c))) v)
                (incf c)))

            (cond
              ((<= (- a first) (- last b))
               (cond
                 ((<= (- last b) (- b a))
                  (cond
                    ((< 1 (- a first))
                     (push-stack dss (1+ isad) a b nxt)
                     (push-stack dss isad b last limit)
                     (setf last a))
                    ((< 1 (- last b))
                     (push-stack dss (1+ isad) a b nxt)
                     (setf first b))
                    ((< 1 (- b a))
                     (incf isad)
                     (setf first a)
                     (setf last b)
                     (setf limit nxt))
                    (t
                     (when (stack-empty-p dss)
                       (return-from dss-tr-intro-sort))
                     (let ((entry (pop-stack dss)))
                       (setf isad (stack-entry-a entry))
                       (setf first (stack-entry-b entry))
                       (setf last (stack-entry-c entry))
                       (setf limit (stack-entry-d entry))))))

                 ((<= (- a first) (- b a))
                  (cond
                    ((< 1 (- a first))
                     (push-stack dss isad b last limit)
                     (push-stack dss (1+ isad) a b nxt)
                     (setf last a))
                    ((< 1 (- b a))
                     (push-stack dss isad b last limit)
                     (incf isad)
                     (setf first a)
                     (setf last b)
                     (setf limit nxt))
                    (t
                     (setf first b))))

                 (t
                  (cond
                    ((< 1 (- b a))
                     (push-stack dss isad b last limit)
                     (push-stack dss isad first a limit)
                     (incf isad)
                     (setf first a)
                     (setf last b)
                     (setf limit nxt))
                    (t
                     (push-stack dss isad b last limit)
                     (setf last a))))))

              (t
               (cond
                 ((<= (- a first) (- b a))
                  (cond
                    ((< 1 (- last b))
                     (push-stack dss (1+ isad) a b nxt)
                     (push-stack dss isad first a limit)
                     (setf first b))
                    ((< 1 (- a first))
                     (push-stack dss (1+ isad) a b nxt)
                     (setf last a))
                    ((< 1 (- b a))
                     (incf isad)
                     (setf first a)
                     (setf last b)
                     (setf limit nxt))
                    (t
                     (push-stack dss isad first last limit))))

                 ((<= (- last b) (- b a))
                  (cond
                    ((< 1 (- last b))
                     (push-stack dss isad first a limit)
                     (push-stack dss (1+ isad) a b nxt)
                     (setf first b))
                    ((< 1 (- b a))
                     (push-stack dss isad first a limit)
                     (incf isad)
                     (setf first a)
                     (setf last b)
                     (setf limit nxt))
                    (t
                     (setf last a))))

                 (t
                  (cond
                    ((< 1 (- b a))
                     (push-stack dss isad first a limit)
                     (push-stack dss isad b last limit)
                     (incf isad)
                     (setf first a)
                     (setf last b)
                     (setf limit nxt))
                    (t
                     (push-stack dss isad first a limit)
                     (setf first b))))))))
           (t
            (break-unless (tr-budget-update budget size (- last first)))
            (incf limit)
            (incf isad)))
       ==next==))

    (dotimes (z (dss-ssize dss))
      (when (= (stack-entry-d (aref (dss-stack dss) z)) -3)
        (dss-ls-update-group dss
                             isa
                             (stack-entry-b (aref (dss-stack dss) z))
                             (stack-entry-c (aref (dss-stack dss) z))))))
  nil)

(define-typed-fn dss-tr-sort ((div-suf-sort dss) (t/int32 isa x depth))
    (null)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((first 0))
    (declare (type t/int32 first))
    (when (< (- x) (aref (dss-sa dss) 0))
      (let ((budget (make-tr-budget :budget x
                                    :chance (coerce-to-int32
                                             (truncate
                                              (muffling
                                                (1+ (/ (* (dss-tr-log x)
                                                          2)
                                                       3)))))))
            (tt 0)
            (last 0))
        (declare (type tr-budget budget)
                 (type t/int32 tt last))

        (loop do
          (cond
            ((< (setf tt (aref (dss-sa dss) first)) 0)
             (decf first tt))
            (t
             ;; Bad line
             (setf last (1+ (aref (dss-sa dss) (+ isa tt))))
             (when (< 1 (- last first))
               (dss-tr-intro-sort dss isa (+ isa depth) (+ isa x) first last budget x)
               (print budget)
               (when (zerop (tr-budget-chance budget))
                 ;; Switch to Larsson-Sadakane sorting algorithm.
                 (when (< 0 first)
                   (setf (aref (dss-sa dss) 0) (- first)))
                 (dss-ls-sort dss isa x depth)
                 (loop-finish)))
             (setf first last)))

          (break-unless (< first x))))))
  nil)

(define-typed-fn dss-sort-type-bstar ((div-suf-sort dss)
                                      (t/int32-array bucket-a bucket-b))
    (t/int32)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (with-rented-array (temp-buf 256 (dss-int32-pool dss))
    (fill temp-buf 0)
    (let ((j 0)
          (k 0)
          (tt 0)
          (c0 0)
          (c1 0)
          (i 1)
          (m 0)
          (ti 0)
          (t0 0)
          (ti1 0)
          (pab 0)
          (isab 0)
          (flag 1))
      (declare (type t/int32 j k tt c0 c1 i flag m ti t0 ti1 pab isab))
      (loop while (< i (dss-n dss)) do
        (when (/= (aref (dss-bytes dss) (1- i))
                  (aref (dss-bytes dss) i))
          (when (> (aref (dss-bytes dss) (1- i))
                   (aref (dss-bytes dss) i))
            (setf flag 0))
          (loop-finish))
        (incf i))

      (setf i (1- (dss-n dss)))
      (setf m (dss-n dss))
      (setf ti (aref (dss-bytes dss) i))
      (setf t0 (aref (dss-bytes dss) 0))
      (when (or (< ti t0)
                (and (= (aref (dss-bytes dss) i)
                        (aref (dss-bytes dss) 0))
                     (not (zerop flag))))
        (cond
          ((zerop flag)
           (incf (aref bucket-b (bucket-b-star ti t0)))
           (setf (aref (dss-sa dss) (decf m)) i))
          (t
           (incf (aref bucket-b (bucket-b ti t0)))))

        (decf i)
        (loop while (and (<= 0 i)
                         (<= (setf ti (aref (dss-bytes dss) i))
                             (setf ti1 (aref (dss-bytes dss) (1+ i)))))
              do (incf (aref bucket-b (bucket-b ti ti1)))
                 (decf i)))

      (loop while (<= 0 i) do
        (loop do
          (incf (aref bucket-a (aref (dss-bytes dss) i)))
          (break-unless (and (<= 0 (decf i))
                             (>= (aref (dss-bytes dss) i)
                                 (aref (dss-bytes dss) (1+ i))))))

        (when (<= 0 i)
          (incf (aref bucket-b (bucket-b-star (aref (dss-bytes dss) i)
                                              (aref (dss-bytes dss) (1+ i)))))
          (setf (aref (dss-sa dss) (decf m)) i)

          (decf i)
          (loop while (and (<= 0 i)
                           (<= (setf ti (aref (dss-bytes dss) i))
                               (setf ti1 (aref (dss-bytes dss) (1+ i)))))
                do (incf (aref bucket-b (bucket-b ti ti1)))
                   (decf i))))

      (setf m (- (dss-n dss) m))
      (when (zerop m)
        (dotimes (z (dss-n dss))
          (setf (aref (dss-sa dss) z) z))
        (return-from dss-sort-type-bstar 0))

      (setf c0 0)
      (setf i -1)
      (setf j 0)
      (loop while (< c0 256) do
        (setf tt (+ i (aref bucket-a c0)))
        (setf (aref bucket-a c0) (+ i j))
        (setf i (+ tt (aref bucket-b (bucket-b c0 c0))))

        (setf c1 (1+ c0))
        (loop while (< c1 256) do
          (incf j (aref bucket-b (bucket-b-star c0 c1)))
          (setf (aref bucket-b (logior (coerce-to-int32 (ash c0 8)) c1)) j)
          (incf i (aref bucket-b (bucket-b c0 c1)))
          (incf c1))

        (incf c0))

      (setf pab (- (dss-n dss) m))
      (setf isab m)
      (setf i (- m 2))
      (loop while (<= 0 i) do
        (setf tt (aref (dss-sa dss) (+ pab i)))
        (setf c0 (aref (dss-bytes dss) tt))
        (setf c1 (aref (dss-bytes dss) (1+ tt)))
        (setf (aref (dss-sa dss)
                    (decf (aref bucket-b (bucket-b-star c0 c1))))
              i)
        (decf i))

      (setf tt (aref (dss-sa dss) (1- (+ pab m))))
      (setf c0 (aref (dss-bytes dss) tt))
      (setf c1 (aref (dss-bytes dss) (1+ tt)))
      (setf (aref (dss-sa dss)
                  (decf (aref bucket-b (bucket-b-star c0 c1))))
            (1- m))

      (let ((buf (dss-sa dss))
            (buf-offset m)
            (buf-size (- (dss-n dss) (* 2 m))))
        (declare (type t/int32-array buf)
                 (type t/int32 buf-offset buf-size))
        (when (<= buf-size 256)
          (setf buf temp-buf)
          (setf buf-offset 0)
          (setf buf-size 256))

        (setf c0 255)
        (setf j m)
        (loop while (< 0 j) do
          (setf c1 255)
          (loop while (< c0 c1) do
            (setf i (aref bucket-b (bucket-b-star c0 c1)))
            (when (< 1 (- j i))
              (dss-sub-string-sort dss pab i j buf buf-offset buf-size 2
                                   (= (aref (dss-sa dss) i)
                                      (1- m))
                                   (dss-n dss)))

            (setf j i)
            (decf c1))

          (decf c0))

        (setf i (1- m))
        (loop while (<= 0 i) do
          (when (<= 0 (aref (dss-sa dss) i))
            (setf j i)
            (loop do
              (setf (aref (dss-sa dss) (+ isab (aref (dss-sa dss) i))) i)
              (break-unless (and (<= 0 (decf i))
                                 (<= 0 (aref (dss-sa dss) i)))))

            (setf (aref (dss-sa dss) (1+ i)) (- i j))
            (break-when (<= i 0)))

          (setf j i)
          (loop do
            (setf (aref (dss-sa dss) (+ isab (setf (aref (dss-sa dss) i)
                                                   (lognot-i32 (aref (dss-sa dss) i)))))
                  j)
            (break-unless (< (aref (dss-sa dss) (decf i)) 0)))

          (setf (aref (dss-sa dss) (+ isab (aref (dss-sa dss) i))) j)
          (decf i))

        (dss-tr-sort dss isab m 1)

        (setf i (1- (dss-n dss)))
        (setf j m)
        (when (or (< (aref (dss-bytes dss) i)
                     (aref (dss-bytes dss) 0))
                  (and (= (aref (dss-bytes dss) i)
                          (aref (dss-bytes dss) 0))
                       (not (zerop flag))))
          (when (zerop flag)
            (setf (aref (dss-sa dss)
                        (aref (dss-sa dss) (+ isab (decf j))))
                  i))

          (decf i)
          (loop while (and (<= 0 i)
                           (<= (aref (dss-bytes dss) i)
                               (aref (dss-bytes dss) (1+ i))))
                do (decf i)))

        (loop while (<= 0 i) do
          (decf i)
          (loop while (and (<= 0 i)
                           (>= (aref (dss-bytes dss) i)
                               (aref (dss-bytes dss) (1+ i))))
                do (decf i))

          (when (<= 0 i)
            (setf (aref (dss-sa dss)
                        (aref (dss-sa dss)
                              (+ isab (decf j))))
                  i)

            (decf i)
            (loop while (and (<= 0 i)
                             (<= (aref (dss-bytes dss) i)
                                 (aref (dss-bytes dss) (1+ i))))
                  do (decf i))))

        (setf c0 255)
        (setf i (1- (dss-n dss)))
        (setf k (1- m))
        (loop while (<= 0 c0) do
          (setf c1 255)
          (loop while (< c0 c1) do
            (setf tt (- i (aref bucket-b (bucket-b c0 c1))))
            (setf (aref bucket-b (bucket-b c0 c1)) (1+ i))

            (setf i tt)
            (setf j (aref bucket-b (bucket-b-star c0 c1)))
            (loop while (<= j k) do
              ;; bad line
              (setf (aref (dss-sa dss) i) (aref (dss-sa dss) k))
              (decf i)
              (decf k))

            (decf c1))
          (setf tt (- i (aref bucket-b (bucket-b c0 c0))))
          (setf (aref bucket-b (bucket-b c0 c0)) (1+ i))
          (when (< c0 255)
            (setf (aref bucket-b (bucket-b-star c0 (1+ c0)))
                  (1+ tt)))
          (setf i (aref bucket-a c0))

          (decf c0)))
      m)))

(define-typed-fn dss-construct-bwt ((div-suf-sort dss)
                                    (t/int32-array bucket-a bucket-b))
    (t/int32)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (let ((tt 0)
        (s 0)
        (s1 0)
        (c0 0)
        (c2 0)
        (orig -1)
        (j 0)
        (c1 254))
    (declare (type t/int32 tt s s1 c0 c2 orig j c1))
    (loop with i fixnum = 0
          while (<= 0 c1)
          do (setf i (aref bucket-b (bucket-b-star c1 (1+ c1))))
             (setf j (aref bucket-a (1+ c1)))
             (setf tt 0)
             (setf c2 -1)

             (loop while (<= i j) do
               (setf s (aref (dss-sa dss) j))
               (setf s1 s)
               (cond
                 ((<= 0 s1)
                  (when (< (decf s) 0)
                    (setf s (1- (dss-n dss))))

                  (setf c0 (aref (dss-bytes dss) s))
                  (when (<= c0 c1)
                    (setf (aref (dss-sa dss) j) (lognot-i32 s1))
                    (when (and (< 0 s)
                               (> (aref (dss-bytes dss) (1- s)) c0))
                      (setf s (lognot-i32 s)))

                    (cond
                      ((= c2 c0)
                       (setf (aref (dss-sa dss) (decf tt)) s))
                      (t
                       (when (<= 0 c2)
                         (setf (aref bucket-b (bucket-b c2 c1)) tt))

                       (setf c2 c0)
                       (setf tt (1- (aref bucket-b (bucket-b c2 c1))))
                       (setf (aref (dss-sa dss) tt) s)))))
                 (t
                  (setf (aref (dss-sa dss) j) (lognot-i32 s))))
               (decf j))
             (decf c1))

    (loop for i fixnum from 0 below (dss-n dss) do
      (setf s (aref (dss-sa dss) i))
      (setf s1 s)

      (cond
        ((<= 0 s1)
         (when (< (decf s) 0)
           (setf s (1- (dss-n dss))))

         (when (>= (setf c0 (aref (dss-bytes dss) s))
                   (aref (dss-bytes dss) (1+ s)))
           (when (and (< 0 s)
                      (< (aref (dss-bytes dss) (1- s)) c0))
             (setf s (lognot-i32 s)))

           (cond
             ((= c0 c2)
              (setf (aref (dss-sa dss) (incf tt)) s))
             (t
              (when (/= c2 -1) ;; BUGFIX: Original code can write to bucketA[-1]
                (setf (aref bucket-a c2) tt))

              (setf c2 c0)
              (setf tt (1+ (aref bucket-a c2)))
              (setf (aref (dss-sa dss) tt) s)))))
        (t
         (setf s1 (lognot-i32 s1))))

      (cond
        ((zerop s1)
         (setf (aref (dss-sa dss) i) (aref (dss-bytes dss) (1- (dss-n dss))))
         (setf orig i))
        (t
         (setf (aref (dss-sa dss) i) (aref (dss-bytes dss)
                                           (aref (dss-bytes dss) (1- s1)))))))
    orig))

(define-typed-fn dss-bwt ((div-suf-sort dss))
    (t/int32)
  ;;(declare (optimize speed (debug 1)))
  (declare (optimize speed (debug 1) (safety 1)))
  (case (dss-n dss)
    (0
     (return-from dss-bwt 0))
    (1
     (setf (aref (dss-sa dss) 0)
           (aref (dss-bytes dss) 0))
     (return-from dss-bwt 0)))

  (with-rented-array (bucket-a +bucket-a-size+ (dss-int32-pool dss))
    (with-rented-array (bucket-b +bucket-b-size+ (dss-int32-pool dss))
      (fill bucket-a 0)
      (fill bucket-b 0)

      (let ((m (dss-sort-type-bstar dss bucket-a bucket-b)))
        (if (< 0 m)
            (dss-construct-bwt dss bucket-a bucket-b)
            0)))))
