Login
Artifact [73e5237742]
Login

Artifact 73e5237742fd71847d81b0f4dbb29acd8f43e54d933496d6143ed50d6e6bc448:


#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2

module RemiLib::Compression::BZip2
  # An in-place, length restricted Canonical Huffman code length allocator.
  #
  # Based on the algorithm proposed by R.L.Milidiú, A.A.Pessoa and E.S.Laber in
  # ["In-place Length-Restricted Prefix
  # Coding"](http://www-di.inf.puc-rio.br/~laber/public/spire98.ps) and
  # incorporating additional ideas from the implementation of
  # ["shcodec"](http://webcenter.ru/~xander/) by Simakov Alexander.
  private module HuffmanAllocator
    extend self

    # Allocates Canonical Huffman code lengths in place based on a sorted
    # frequency array.
    #
    # `arr` should be a sorted array of symbol frequencies.  These will be
    # modified in-place into an array of canonical Huffman code lengths.
    #
    # `maxLength` is the maximum code length.  This must be at least
    # `Math.ceil(Math.log2(arr.size))`.
    def allocateHuffmanCodeLengths(arr : Array(Int32), maxLength : Int) : Nil
      case arr.size
      when 2
        arr[1] = 1
      when 1
        arr[0] = 1
        return
      end

      setExtendedParentPointers(arr)

      nodesToRelocate : Int32 = findNotesToRelocate(arr, maxLength)

      if (arr[0] % arr.size) >= nodesToRelocate
        allocateNodeLengths(arr)
      else
        insertDepth : Int32 = maxLength - significantBits(nodesToRelocate - 1)
        allocateNodeLengthsWithRelocation(arr, nodesToRelocate, insertDepth)
      end
    end

    private def first(array : Array(Int32), i : Int32, nodesToMove : Int32) : Int32
      len : Int32 = array.size
      limit : Int32 = i
      k : Int32 = array.size - 2

      while (i >= nodesToMove) && (array[i] % len) > limit
        k = i
        i -= (limit - i + 1)
      end
      i = Math.max(nodesToMove - 1, i)

      tmp : Int32 = 0
      while k > (i + 1)
        tmp = (i + k) >> 1
        if (array[tmp] % len) > limit
          k = tmp
        else
          i = tmp
        end
      end

      k
    end

    # Fills the code array with extended parent pointers.
    private def setExtendedParentPointers(array : Array(Int32)) : Nil
      len : Int32 = array.size
      array[0] += array[1]

      headNode : Int32 = 0
      tailNode : Int32 = 1
      topNode : Int32 = 2
      tmp : Int32 = 0

      while tailNode < (len - 1)
        tmp = 0
        if topNode >= len || array[headNode] < array[topNode]
          tmp = array[headNode]
          array[headNode] = tailNode
          headNode += 1
        else
          tmp = array[topNode]
          topNode += 1
        end

        if topNode >= len || (headNode < tailNode && array[headNode] < array[topNode])
          tmp += array[headNode]
          array[headNode] = tailNode + len
          headNode += 1
        else
          tmp += array[topNode]
          topNode += 1
        end

        array[tailNode] = tmp
        tailNode += 1
      end
    end

    # Finds the number of nodes to relocate in order to achieve a given code length limit.
    private def findNotesToRelocate(array : Array(Int32), maxLength : Int32) : Int32
      curNode : Int32 = array.size - 2
      curDepth : Int32 = 1
      lastNode : Int32 = 0

      while curDepth < (maxLength - 1) && curNode > 1
        curNode = first(array, curNode - 1, 0)
        curDepth += 1
      end

      curNode
    end

    # A final allocation pass with no code length limit.
    private def allocateNodeLengths(array : Array(Int32)) : Nil
      firstNode : Int32 = array.size - 2
      nextNode : Int32 = array.size - 1
      currentDepth : Int32 = 1
      availableNodes : Int32 = 2
      lastNode : Int32 = 0

      while availableNodes > 0
        lastNode = firstNode
        firstNode = first(array, lastNode - 1, 0)

        (availableNodes - (lastNode - firstNode)).downto(1) do |_|
          array[nextNode] = currentDepth
          nextNode -= 1
        end

        availableNodes = (lastNode - firstNode) << 1
        currentDepth += 1
      end
    end

    # A final allocation pass that relocates nodes in order to achieve a maximum
    # code length limit.
    private def allocateNodeLengthsWithRelocation(array : Array(Int32),
                                                  nodesToMove : Int32, insertDepth : Int32) : Nil
      firstNode : Int32 = array.size - 2
      nextNode : Int32 = array.size - 1
      currentDepth : Int32 = (insertDepth == 1 ? 2 : 1)
      nodesLeftToMove = (insertDepth == 1 ? nodesToMove - 2 : nodesToMove)
      availableNodes : Int32 = currentDepth << 1
      lastNode : Int32 = 0
      offset : Int32 = 0

      while availableNodes > 0
        lastNode = firstNode
        firstNode = if firstNode <= nodesToMove
                      firstNode
                    else
                      first(array, lastNode - 1, nodesToMove)
                    end

        offset = 0
        if currentDepth >= insertDepth
          offset = Math.min(nodesLeftToMove, 1 << (currentDepth - insertDepth))
        elsif currentDepth == insertDepth - 1
          offset = 1
          firstNode += 1 if array[firstNode] == lastNode
        end

        (availableNodes - (lastNode - firstNode + offset)).downto(1) do |_|
          array[nextNode] = currentDepth
          nextNode -= 1
        end

        nodesLeftToMove -= offset
        availableNodes = (lastNode - firstNode + offset) << 1
        currentDepth += 1
      end
    end

    private def significantBits(x : Int32) : Int32
      n : Int32 = 0
      while x > 0
        x >>= 1
        n += 1
      end
      n
    end
  end
end