#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2
require "./blockdecompressor"
module RemiLib::Compression::BZip2
# A read-only IO object to decompress data in the bzip2 format.
#
# Instances of this class wrap another IO object. When you read from this
# instance instance, it reads data from the underlying IO, decompresses it,
# and returns it to the caller.
#
# Example of a simple decompressor:
#
# ```crystal
# # Decompress from `file` and save to `output`.
# File.open(bzip2file, "rb") do |file|
# File.open(outputfile, "wb") do |output|
# RemiLib::Compression::BZip2::Reader.open(file) do |bzio|
# IO.copy(bzio, output)
# end
# end
# end
#```
class Reader < IO
include IO::Buffered
# If `#sync_close?` is `true`, closing this IO will close the underlying IO.
property? sync_close : Bool
# Returns `true` if this reader is closed.
getter? closed = false
# Peeked bytes from the underlying IO
@peek : Bytes?
# Decompressor for the current block.
@stream : BlockDecompressor?
@streamBlockSize : Int32 = 0
@streamCRC : UInt32 = 0
# Creates an instance of Flate::Reader.
def initialize(@io : IO, @sync_close : Bool = false)
@bitStream = BitReader.new(@io)
@peek = nil
@atEnd = false
end
# Creates a new reader from the given *io*, yields it to the given block,
# and closes it at its end.
def self.open(io : IO, sync_close : Bool = false, &)
reader = new(io, sync_close: sync_close)
yield reader ensure reader.close
end
# Always raises `IO::Error` because this is a read-only `IO`.
def unbuffered_write(slice : Bytes) : NoReturn
raise IO::Error.new("Can't write to RemiLib::Compression::BZip2::Reader")
end
# See `IO#read`.
def unbuffered_read(slice : Bytes) : Int32
check_open
return 0 if slice.empty?
return 0 if @atEnd
bytesRead : Int32 = 0
if strm = @stream
bytesRead = strm.read(slice)
else
initStream
end
return bytesRead unless bytesRead <= 0
if initNextBlock
@stream.try { |st| bytesRead = st.read(slice) }
end
# bytesRead could be -1
Math.max(bytesRead, 0)
end
def unbuffered_flush : NoReturn
raise IO::Error.new "Can't flush RemiLib::Compression::BZip2::Reader"
end
# Closes this reader.
def unbuffered_close : Nil
return if @closed
@closed = true
@io.close if @sync_close
end
def unbuffered_rewind : Nil
check_open
@io.rewind
initialize(@io, @sync_close)
end
def inspect(io : IO) : Nil
to_s(io)
end
# Reads the stream header and checks that the data appears to be a valid
# BZip2 stream. This will raise a `BZip2::Error` if the header is invalid.
private def initStream : Nil
# Read stream header
marker1 = @bitStream.read(16).to_u32!
marker2 = @bitStream.read(8).to_u32!
blockSize = @bitStream.read(8).to_i32! - 48 # 48 == '0'.ord
if marker1 != STREAM_START_MARKER_1 || marker2 != STREAM_START_MARKER_2 ||
blockSize < MIN_COMPRESSION_LEVEL || blockSize > MAX_COMPRESSION_LEVEL
raise Error.new("Invalid BZip2 header")
end
@streamBlockSize = blockSize * 100000
end
# Prepares a new block for decompression if any remain in the stream.
#
# If a previous block has completed, its CRC is checked and merged into the
# stream CRC. If the previous block was the final block in the stream, the
# stream CRC is validated.
#
# Returns `true` if a block was successfully initialized, or `false` if the
# end of file marker was encountered.
private def initNextBlock : Bool
return false if @atEnd
# If a block is complete, check the block CRC and integrate it into the
# stream CRC.
@stream.try do |strm|
blockCRC = strm.checkCrc
@streamCRC = ((@streamCRC << 1) | (@streamCRC >> 31)) ^ blockCRC
end
# Read block-header or end-of-stream marker.
marker1 = @bitStream.read(24)
marker2 = @bitStream.read(24)
if marker1 == BLOCK_HEADER_MARKER_1 && marker2 == BLOCK_HEADER_MARKER_2
# Initialize a new block.
begin
@stream = BlockDecompressor.new(@bitStream, @streamBlockSize)
rescue err : Exception
@atEnd = true
raise err
end
return true
end
# Read and verify the end-of-stream CRC.
if marker1 == STREAM_END_MARKER_1 && marker2 == STREAM_END_MARKER_2
@atEnd = true
storedCombinedCRC = @bitStream.read(32).to_u32!
#STDERR << "combined CRC: #{storedCombinedCRC} == #{@streamCRC}\n"
raise Error.new("BZip2 stream CRC error") if storedCombinedCRC != @streamCRC
return false
end
# If what was read is not a valid block-header or end-of-stream marker,
# the stream is broken.
@atEnd = true
raise Error.new("BZip2 stream format error")
end
end
end
|