#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2
require "../../bitreader"
module RemiLib::Compression::BZip2
private class HuffmanStageDecoder
@reader : BitReader
# The Huffman table number to use for each group of 50 symbols.
@selectors : Bytes
# The minimum code length for each Huffman table.
@minimumLengths : Array(Int32) = Array(Int32).new(MAXIMUM_TABLES , 0i32)
# An array of values for each Huffman table that must be subtracted from the
# numerical value of a Huffman code of a given bit length to give its
# canonical code index.
@codeBases : Array(Array(Int32))
# An array of values for each Huffman table that gives the highest numerical
# value of a Huffman code of a given bit length.
@codeLimits : Array(Array(Int32))
# A mapping for each Huffman table from canonical code index to output
# symbol.
@codeSymbols : Array(Array(Int32))
# The Huffman table for the current group.
@currentTable : UInt8
# The index of the current group within the selectors array.
@groupIndex : Int32 = -1
# The byte position within the current group. A new group is selected every
# 50 decoded bytes.
@groupPosition : Int32 = -1
# Creates a new `HuffmanStageDecoder` instance.
def initialize(@reader : BitReader, alphabetSize : Int32, tableCodeLengths : Array(Array(UInt8)),
@selectors : Bytes)
@codeBases = Array(Array(Int32)).new(MAXIMUM_TABLES) do |_|
Array(Int32).new(MAX_CODE_LENGTH + 2, 0i32)
end
@codeLimits = Array(Array(Int32)).new(MAXIMUM_TABLES) do |_|
Array(Int32).new(MAX_CODE_LENGTH + 1, 0i32)
end
@codeSymbols = Array(Array(Int32)).new(MAXIMUM_TABLES) do |_|
Array(Int32).new(MAX_ALPHABET_SIZE, 0i32)
end
@currentTable = @selectors[0]
createHuffmanDecodingTable(alphabetSize, tableCodeLengths)
end
# Decodes and returns the next symbol. This will raise a `BZip2::Error` if
# the end of the input stream is reached while decoding.
def nextSymbol : Int32
# Move to next group selector if required.
@groupPosition += 1
if @groupPosition % GROUP_RUN_LENGTH == 0
@groupIndex += 1
raise Error.new("Error decoding BZip2 block") if @groupIndex >= @selectors.size
@currentTable = @selectors.unsafe_fetch(@groupIndex)
end
codeLength : Int32 = @minimumLengths[@currentTable]
tableLimits = @codeLimits[@currentTable]
# Starting with the minimum bit length for the table, read additional bits
# one at a time until a complete code is recognised.
codeBits : UInt32 = @reader.read(codeLength).to_u32!
while codeLength <= MAX_CODE_LENGTH
if codeBits <= tableLimits[codeLength]
# Convert the code to a symbol index and return.
return @codeSymbols[@currentTable][codeBits - @codeBases[@currentTable][codeLength]]
end
codeBits = (codeBits << 1) | @reader.read(1)
codeLength += 1
end
# A valid code was not recognized.
raise Error.new("Error decoding BZip2 block")
end
# Constructs Huffman decoding tables from lists of Canonical Huffman code
# lengths. `alphabetSize` is the total number of codes (uniform for each
# table). `tableCodeLengths` is the Canonical Huffman code lengths for each
# table.
private def createHuffmanDecodingTable(alphabetSize : Int32, tableCodeLengths : Array(Array(UInt8))) : Nil
minLength : Int32 = 0
maxLength : Int32 = 0
code : Int32 = 0
base1 : Int32 = 0
codeIndex : Int32 = 0
tableCodeLengths.size.times do |table|
minLength = Int32::MAX
maxLength = Int32::MIN
tableBases = @codeBases[table]
codeLengths = tableCodeLengths.unsafe_fetch(table)
tableLimits = @codeLimits[table]
tableSymbols = @codeSymbols[table]
# Find the minimum and maximum code length for the table.
alphabetSize.times do |i|
maxLength = Math.max(codeLengths[i].to_i32!, maxLength)
minLength = Math.min(codeLengths[i].to_i32!, minLength)
end
@minimumLengths[table] = minLength
# Calculate the first output symbol for each code length.
alphabetSize.times do |i|
tableBases[codeLengths.unsafe_fetch(i) + 1] += 1
end
(1...(MAX_CODE_LENGTH + 2)).each do |i|
tableBases[i] += tableBases[i - 1]
end
# Calculate the first and last Huffman code for each code length (codes
# at a given length are sequential in value).
code = 0
(minLength..maxLength).each do |i|
base1 = code
code += tableBases[i + 1] - tableBases[i]
tableBases[i] = base1 - tableBases[i]
tableLimits[i] = code - 1
code <<= 1
end
# Populate the mapping from canonical code index to output symbol.
codeIndex = 0
(minLength..maxLength).each do |bitLength|
alphabetSize.times do |symbol|
if codeLengths[symbol] == bitLength
tableSymbols[codeIndex] = symbol
codeIndex += 1
end
end
end
end
end
end
end
|