Login
Artifact [40e4eb9fba]
Login

Artifact 40e4eb9fbac5e81adc86f6bae37dcc83ddad38ba10b21bb726f7b22ba1817f93:


#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2
require "./blockdecompressor"

module RemiLib::Compression::BZip2
  # A read-only IO object to decompress data in the bzip2 format.
  #
  # Instances of this class wrap another IO object. When you read from this
  # instance instance, it reads data from the underlying IO, decompresses it,
  # and returns it to the caller.
  #
  # Example of a simple decompressor:
  #
  # ```crystal
  # # Decompress from `file` and save to `output`.
  # File.open(bzip2file, "rb") do |file|
  #   File.open(outputfile, "wb") do |output|
  #     RemiLib::Compression::BZip2::Reader.open(file) do |bzio|
  #       IO.copy(bzio, output)
  #     end
  #   end
  # end
  #```
  class Reader < IO
    include IO::Buffered

    # If `#sync_close?` is `true`, closing this IO will close the underlying IO.
    property? sync_close : Bool

    # Returns `true` if this reader is closed.
    getter? closed = false

    # Peeked bytes from the underlying IO
    @peek : Bytes?

    # Decompressor for the current block.
    @stream : BlockDecompressor?

    @streamBlockSize : Int32 = 0

    @streamCRC : UInt32 = 0

    # Creates an instance of Flate::Reader.
    def initialize(@io : IO, @sync_close : Bool = false)
      @bitStream = BitReader.new(@io)
      @peek = nil
      @atEnd = false
    end

    # Creates a new reader from the given *io*, yields it to the given block,
    # and closes it at its end.
    def self.open(io : IO, sync_close : Bool = false, &)
      reader = new(io, sync_close: sync_close)
      yield reader ensure reader.close
    end

    # Always raises `IO::Error` because this is a read-only `IO`.
    def unbuffered_write(slice : Bytes) : NoReturn
      raise IO::Error.new("Can't write to RemiLib::Compression::BZip2::Reader")
    end

    # See `IO#read`.
    def unbuffered_read(slice : Bytes) : Int32
      check_open

      return 0 if slice.empty?
      return 0 if @atEnd

      bytesRead : Int32 = 0
      if strm = @stream
        bytesRead = strm.read(slice)
      else
        initStream
      end

      return bytesRead unless bytesRead <= 0

      if initNextBlock
        @stream.try { |st| bytesRead = st.read(slice) }
      end

      # bytesRead could be -1
      Math.max(bytesRead, 0)
    end

    def unbuffered_flush : NoReturn
      raise IO::Error.new "Can't flush RemiLib::Compression::BZip2::Reader"
    end

    # Closes this reader.
    def unbuffered_close : Nil
      return if @closed
      @closed = true
      @io.close if @sync_close
    end

    def unbuffered_rewind : Nil
      check_open
      @io.rewind
      initialize(@io, @sync_close)
    end

    def inspect(io : IO) : Nil
      to_s(io)
    end

    # Reads the stream header and checks that the data appears to be a valid
    # BZip2 stream.  This will raise a `BZip2::Error` if the header is invalid.
    private def initStream : Nil
      # Read stream header
      marker1 = @bitStream.read(16).to_u32!
      marker2 = @bitStream.read(8).to_u32!
      blockSize = @bitStream.read(8).to_i32! - 48 # 48 == '0'.ord

      if marker1 != STREAM_START_MARKER_1 || marker2 != STREAM_START_MARKER_2 ||
         blockSize < MIN_COMPRESSION_LEVEL || blockSize > MAX_COMPRESSION_LEVEL
        raise Error.new("Invalid BZip2 header")
      end

      @streamBlockSize = blockSize * 100000
    end

    # Prepares a new block for decompression if any remain in the stream.
    #
    # If a previous block has completed, its CRC is checked and merged into the
    # stream CRC.  If the previous block was the final block in the stream, the
    # stream CRC is validated.
    #
    # Returns `true` if a block was successfully initialized, or `false` if the
    # end of file marker was encountered.
    private def initNextBlock : Bool
      return false if @atEnd

      # If a block is complete, check the block CRC and integrate it into the
      # stream CRC.
      @stream.try do |strm|
        blockCRC = strm.checkCrc
        @streamCRC = ((@streamCRC << 1) | (@streamCRC >> 31)) ^ blockCRC
      end

      # Read block-header or end-of-stream marker.
      marker1 = @bitStream.read(24)
      marker2 = @bitStream.read(24)

      if marker1 == BLOCK_HEADER_MARKER_1 && marker2 == BLOCK_HEADER_MARKER_2
        # Initialize a new block.
        begin
          @stream = BlockDecompressor.new(@bitStream, @streamBlockSize)
        rescue err : Exception
          @atEnd = true
          raise err
        end

        return true
      end

      # Read and verify the end-of-stream CRC.
      if marker1 == STREAM_END_MARKER_1 && marker2 == STREAM_END_MARKER_2
        @atEnd = true
        storedCombinedCRC = @bitStream.read(32).to_u32!
        #STDERR << "combined CRC: #{storedCombinedCRC} == #{@streamCRC}\n"
        raise Error.new("BZip2 stream CRC error") if storedCombinedCRC != @streamCRC
        return false
      end

      # If what was read is not a valid block-header or end-of-stream marker,
      # the stream is broken.
      @atEnd = true
      raise Error.new("BZip2 stream format error")
    end
  end
end