Login
reader.cr at tip
Login

File src/remilib/compression/bzip/reader.cr from the latest check-in


     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
   141
   142
   143
   144
   145
   146
   147
   148
   149
   150
   151
   152
   153
   154
   155
   156
   157
   158
   159
   160
   161
   162
   163
   164
   165
   166
   167
   168
   169
   170
   171
   172
   173
   174
   175
   176
   177
   178
   179
   180
   181
#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2
require "./blockdecompressor"

module RemiLib::Compression::BZip2
  # A read-only IO object to decompress data in the bzip2 format.
  #
  # Instances of this class wrap another IO object. When you read from this
  # instance instance, it reads data from the underlying IO, decompresses it,
  # and returns it to the caller.
  #
  # Example of a simple decompressor:
  #
  # ```crystal
  # # Decompress from `file` and save to `output`.
  # File.open(bzip2file, "rb") do |file|
  #   File.open(outputfile, "wb") do |output|
  #     RemiLib::Compression::BZip2::Reader.open(file) do |bzio|
  #       IO.copy(bzio, output)
  #     end
  #   end
  # end
  #```
  class Reader < IO
    include IO::Buffered

    # If `#sync_close?` is `true`, closing this IO will close the underlying IO.
    property? sync_close : Bool

    # Returns `true` if this reader is closed.
    getter? closed = false

    # Peeked bytes from the underlying IO
    @peek : Bytes?

    # Decompressor for the current block.
    @stream : BlockDecompressor?

    @streamBlockSize : Int32 = 0

    @streamCRC : UInt32 = 0

    # Creates an instance of Flate::Reader.
    def initialize(@io : IO, @sync_close : Bool = false)
      @bitStream = BitReader.new(@io)
      @peek = nil
      @atEnd = false
    end

    # Creates a new reader from the given *io*, yields it to the given block,
    # and closes it at its end.
    def self.open(io : IO, sync_close : Bool = false, &)
      reader = new(io, sync_close: sync_close)
      yield reader ensure reader.close
    end

    # Always raises `IO::Error` because this is a read-only `IO`.
    def unbuffered_write(slice : Bytes) : NoReturn
      raise IO::Error.new("Can't write to RemiLib::Compression::BZip2::Reader")
    end

    # See `IO#read`.
    def unbuffered_read(slice : Bytes) : Int32
      check_open

      return 0 if slice.empty?
      return 0 if @atEnd

      bytesRead : Int32 = 0
      if strm = @stream
        bytesRead = strm.read(slice)
      else
        initStream
      end

      return bytesRead unless bytesRead <= 0

      if initNextBlock
        @stream.try { |st| bytesRead = st.read(slice) }
      end

      # bytesRead could be -1
      Math.max(bytesRead, 0)
    end

    def unbuffered_flush : NoReturn
      raise IO::Error.new "Can't flush RemiLib::Compression::BZip2::Reader"
    end

    # Closes this reader.
    def unbuffered_close : Nil
      return if @closed
      @closed = true
      @io.close if @sync_close
    end

    def unbuffered_rewind : Nil
      check_open
      @io.rewind
      initialize(@io, @sync_close)
    end

    def inspect(io : IO) : Nil
      to_s(io)
    end

    # Reads the stream header and checks that the data appears to be a valid
    # BZip2 stream.  This will raise a `BZip2::Error` if the header is invalid.
    private def initStream : Nil
      # Read stream header
      marker1 = @bitStream.read(16).to_u32!
      marker2 = @bitStream.read(8).to_u32!
      blockSize = @bitStream.read(8).to_i32! - 48 # 48 == '0'.ord

      if marker1 != STREAM_START_MARKER_1 || marker2 != STREAM_START_MARKER_2 ||
         blockSize < MIN_COMPRESSION_LEVEL || blockSize > MAX_COMPRESSION_LEVEL
        raise Error.new("Invalid BZip2 header")
      end

      @streamBlockSize = blockSize * 100000
    end

    # Prepares a new block for decompression if any remain in the stream.
    #
    # If a previous block has completed, its CRC is checked and merged into the
    # stream CRC.  If the previous block was the final block in the stream, the
    # stream CRC is validated.
    #
    # Returns `true` if a block was successfully initialized, or `false` if the
    # end of file marker was encountered.
    private def initNextBlock : Bool
      return false if @atEnd

      # If a block is complete, check the block CRC and integrate it into the
      # stream CRC.
      @stream.try do |strm|
        blockCRC = strm.checkCrc
        @streamCRC = ((@streamCRC << 1) | (@streamCRC >> 31)) ^ blockCRC
      end

      # Read block-header or end-of-stream marker.
      marker1 = @bitStream.read(24)
      marker2 = @bitStream.read(24)

      if marker1 == BLOCK_HEADER_MARKER_1 && marker2 == BLOCK_HEADER_MARKER_2
        # Initialize a new block.
        begin
          @stream = BlockDecompressor.new(@bitStream, @streamBlockSize)
        rescue err : Exception
          @atEnd = true
          raise err
        end

        return true
      end

      # Read and verify the end-of-stream CRC.
      if marker1 == STREAM_END_MARKER_1 && marker2 == STREAM_END_MARKER_2
        @atEnd = true
        storedCombinedCRC = @bitStream.read(32).to_u32!
        #STDERR << "combined CRC: #{storedCombinedCRC} == #{@streamCRC}\n"
        raise Error.new("BZip2 stream CRC error") if storedCombinedCRC != @streamCRC
        return false
      end

      # If what was read is not a valid block-header or end-of-stream marker,
      # the stream is broken.
      @atEnd = true
      raise Error.new("BZip2 stream format error")
    end
  end
end