#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2
module RemiLib::Compression::BZip2
# An encoder for the BZip2 Move To Front Transform and Run-Length Encoding[2]
# stages.
#
# Although conceptually these two stages are separate, it is computationally
# efficient to perform them in one pass.
private class MtfAndRle2StageEncoder
# The Burrows-Wheeler transformed block.
@bwtBlock : Array(Int32)
# Actual length of the data in the bwtBlock array.
@bwtLength : Int32
# At each position, true if the byte value with that index is present within
# the block, otherwise false.
@bwtValuesInUse : Array(Bool)
# The output of the Move To Front Transform and Run-Length Encoding[2]
# stages.
getter mtfBlock : Array(UInt16)
# The global frequencies of values within the mtfBlock array.
getter mtfSymbolFrequencies : Array(Int32) = Array(Int32).new(MAX_ALPHABET_SIZE, 0i32)
# Gets The actual length of the MTF block.
getter mtfLength : Int32 = 0
# Gets the size of the MTF block's alphabet.
getter mtfAlphabetSize : Int32 = 0
# Creates a new `MtfAndRle2StageEncoder` instance.
#
# * `bwtBlock` is the Burrows-Wheeler Transformed block data.
# * `bwtLength` is the actual length of the BWT data.
# * `bwtValuesInUse` is the values that are present within the BWT data. For
# each index, `true` if that value is present within the data, otherwise
# `false`.
def initialize(@bwtBlock : Array(Int32), @bwtLength : Int32, @bwtValuesInUse : Array(Bool))
@mtfBlock = Array(UInt16).new(@bwtLength + 1, 0u16)
end
# Performs the Move To Front transform and Run Length Encoding[1] stages.
def encode : Nil
huffmanSymbolMap = StaticArray(UInt8, 256).new(0u8)
symbolMTF : MoveToFront = MoveToFront.new
totalUniqueValues : Int32 = 0
256.times do |i|
if @bwtValuesInUse[i]
huffmanSymbolMap[i] = totalUniqueValues.to_u8!
totalUniqueValues += 1
end
end
endOfBlockSymbol : Int32 = totalUniqueValues + 1
mtfIndex : Int32 = 0
repeatCount : Int32 = 0
totalRunAs : Int32 = 0
totalRunBs : Int32 = 0
mtfPosition : Int32 = 0
@bwtLength.times do |i|
# Move-To-Front
mtfPosition = symbolMTF.valueToFront(huffmanSymbolMap[@bwtBlock.unsafe_fetch(i) & 0xFF])
# Run-length Encoding
if mtfPosition == 0
repeatCount += 1
else
if repeatCount > 0
repeatCount -= 1
loop do
if (repeatCount & 1) == 0
@mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNA.to_u16!)
mtfIndex += 1
totalRunAs += 1
else
@mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNB.to_u16!)
mtfIndex += 1
totalRunBs += 1
end
break if repeatCount <= 1
repeatCount = (repeatCount - 2) >> 1
end
repeatCount = 0
end
@mtfBlock.unsafe_put(mtfIndex, (mtfPosition + 1).to_u16!)
mtfIndex += 1
@mtfSymbolFrequencies.unsafe_put(mtfPosition + 1,
@mtfSymbolFrequencies.unsafe_fetch(mtfPosition + 1) + 1)
end
end
if repeatCount > 0
repeatCount -= 1
loop do
if (repeatCount & 1) == 0
@mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNA.to_u16!)
mtfIndex += 1
totalRunAs += 1
else
@mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNB.to_u16!)
mtfIndex += 1
totalRunBs += 1
end
break if repeatCount <= 1
repeatCount = (repeatCount - 2) >> 1
end
end
@mtfBlock.unsafe_put(mtfIndex, endOfBlockSymbol.to_u16!)
@mtfSymbolFrequencies.unsafe_put(endOfBlockSymbol,
@mtfSymbolFrequencies.unsafe_fetch(endOfBlockSymbol) + 1)
@mtfSymbolFrequencies.unsafe_put(RLE_SYMBOL_RUNA,
@mtfSymbolFrequencies.unsafe_fetch(RLE_SYMBOL_RUNA) + totalRunAs)
@mtfSymbolFrequencies.unsafe_put(RLE_SYMBOL_RUNB,
@mtfSymbolFrequencies.unsafe_fetch(RLE_SYMBOL_RUNB) + totalRunBs)
@mtfLength = mtfIndex + 1
@mtfAlphabetSize = endOfBlockSymbol + 1
end
end
end
|