#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2
module RemiLib::Compression::BZip2
# An in-place, length restricted Canonical Huffman code length allocator.
#
# Based on the algorithm proposed by R.L.Milidiú, A.A.Pessoa and E.S.Laber in
# ["In-place Length-Restricted Prefix
# Coding"](http://www-di.inf.puc-rio.br/~laber/public/spire98.ps) and
# incorporating additional ideas from the implementation of
# ["shcodec"](http://webcenter.ru/~xander/) by Simakov Alexander.
private module HuffmanAllocator
extend self
# Allocates Canonical Huffman code lengths in place based on a sorted
# frequency array.
#
# `arr` should be a sorted array of symbol frequencies. These will be
# modified in-place into an array of canonical Huffman code lengths.
#
# `maxLength` is the maximum code length. This must be at least
# `Math.ceil(Math.log2(arr.size))`.
def allocateHuffmanCodeLengths(arr : Array(Int32), maxLength : Int) : Nil
case arr.size
when 2
arr[1] = 1
when 1
arr[0] = 1
return
end
setExtendedParentPointers(arr)
nodesToRelocate : Int32 = findNotesToRelocate(arr, maxLength)
if (arr[0] % arr.size) >= nodesToRelocate
allocateNodeLengths(arr)
else
insertDepth : Int32 = maxLength - significantBits(nodesToRelocate - 1)
allocateNodeLengthsWithRelocation(arr, nodesToRelocate, insertDepth)
end
end
private def first(array : Array(Int32), i : Int32, nodesToMove : Int32) : Int32
len : Int32 = array.size
limit : Int32 = i
k : Int32 = array.size - 2
while (i >= nodesToMove) && (array[i] % len) > limit
k = i
i -= (limit - i + 1)
end
i = Math.max(nodesToMove - 1, i)
tmp : Int32 = 0
while k > (i + 1)
tmp = (i + k) >> 1
if (array[tmp] % len) > limit
k = tmp
else
i = tmp
end
end
k
end
# Fills the code array with extended parent pointers.
private def setExtendedParentPointers(array : Array(Int32)) : Nil
len : Int32 = array.size
array[0] += array[1]
headNode : Int32 = 0
tailNode : Int32 = 1
topNode : Int32 = 2
tmp : Int32 = 0
while tailNode < (len - 1)
tmp = 0
if topNode >= len || array[headNode] < array[topNode]
tmp = array[headNode]
array[headNode] = tailNode
headNode += 1
else
tmp = array[topNode]
topNode += 1
end
if topNode >= len || (headNode < tailNode && array[headNode] < array[topNode])
tmp += array[headNode]
array[headNode] = tailNode + len
headNode += 1
else
tmp += array[topNode]
topNode += 1
end
array[tailNode] = tmp
tailNode += 1
end
end
# Finds the number of nodes to relocate in order to achieve a given code length limit.
private def findNotesToRelocate(array : Array(Int32), maxLength : Int32) : Int32
curNode : Int32 = array.size - 2
curDepth : Int32 = 1
lastNode : Int32 = 0
while curDepth < (maxLength - 1) && curNode > 1
curNode = first(array, curNode - 1, 0)
curDepth += 1
end
curNode
end
# A final allocation pass with no code length limit.
private def allocateNodeLengths(array : Array(Int32)) : Nil
firstNode : Int32 = array.size - 2
nextNode : Int32 = array.size - 1
currentDepth : Int32 = 1
availableNodes : Int32 = 2
lastNode : Int32 = 0
while availableNodes > 0
lastNode = firstNode
firstNode = first(array, lastNode - 1, 0)
(availableNodes - (lastNode - firstNode)).downto(1) do |_|
array[nextNode] = currentDepth
nextNode -= 1
end
availableNodes = (lastNode - firstNode) << 1
currentDepth += 1
end
end
# A final allocation pass that relocates nodes in order to achieve a maximum
# code length limit.
private def allocateNodeLengthsWithRelocation(array : Array(Int32),
nodesToMove : Int32, insertDepth : Int32) : Nil
firstNode : Int32 = array.size - 2
nextNode : Int32 = array.size - 1
currentDepth : Int32 = (insertDepth == 1 ? 2 : 1)
nodesLeftToMove = (insertDepth == 1 ? nodesToMove - 2 : nodesToMove)
availableNodes : Int32 = currentDepth << 1
lastNode : Int32 = 0
offset : Int32 = 0
while availableNodes > 0
lastNode = firstNode
firstNode = if firstNode <= nodesToMove
firstNode
else
first(array, lastNode - 1, nodesToMove)
end
offset = 0
if currentDepth >= insertDepth
offset = Math.min(nodesLeftToMove, 1 << (currentDepth - insertDepth))
elsif currentDepth == insertDepth - 1
offset = 1
firstNode += 1 if array[firstNode] == lastNode
end
(availableNodes - (lastNode - firstNode + offset)).downto(1) do |_|
array[nextNode] = currentDepth
nextNode -= 1
end
nodesLeftToMove -= offset
availableNodes = (lastNode - firstNode + offset) << 1
currentDepth += 1
end
end
private def significantBits(x : Int32) : Int32
n : Int32 = 0
while x > 0
x >>= 1
n += 1
end
n
end
end
end
|