Login
mtfandrle2stageencoder.cr at tip
Login

File src/remilib/compression/bzip/mtfandrle2stageencoder.cr from the latest check-in


     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2

module RemiLib::Compression::BZip2
  # An encoder for the BZip2 Move To Front Transform and Run-Length Encoding[2]
  # stages.
  #
  # Although conceptually these two stages are separate, it is computationally
  # efficient to perform them in one pass.
  private class MtfAndRle2StageEncoder
    # The Burrows-Wheeler transformed block.
    @bwtBlock : Array(Int32)

    # Actual length of the data in the bwtBlock array.
    @bwtLength : Int32

    # At each position, true if the byte value with that index is present within
    # the block, otherwise false.
    @bwtValuesInUse : Array(Bool)

    # The output of the Move To Front Transform and Run-Length Encoding[2]
    # stages.
    getter mtfBlock : Array(UInt16)

    # The global frequencies of values within the mtfBlock array.
    getter mtfSymbolFrequencies : Array(Int32) = Array(Int32).new(MAX_ALPHABET_SIZE, 0i32)

    # Gets The actual length of the MTF block.
    getter mtfLength : Int32 = 0

    # Gets the size of the MTF block's alphabet.
    getter mtfAlphabetSize : Int32 = 0

    # Creates a new `MtfAndRle2StageEncoder` instance.
    #
    # * `bwtBlock` is the Burrows-Wheeler Transformed block data.
    # * `bwtLength` is the actual length of the BWT data.
    # * `bwtValuesInUse` is the values that are present within the BWT data. For
    #   each index, `true` if that value is present within the data, otherwise
    #   `false`.
    def initialize(@bwtBlock : Array(Int32), @bwtLength : Int32, @bwtValuesInUse : Array(Bool))
      @mtfBlock = Array(UInt16).new(@bwtLength + 1, 0u16)
    end

    # Performs the Move To Front transform and Run Length Encoding[1] stages.
    def encode : Nil
      huffmanSymbolMap = StaticArray(UInt8, 256).new(0u8)
      symbolMTF : MoveToFront = MoveToFront.new

      totalUniqueValues : Int32 = 0
      256.times do |i|
        if @bwtValuesInUse[i]
          huffmanSymbolMap[i] = totalUniqueValues.to_u8!
          totalUniqueValues += 1
        end
      end

      endOfBlockSymbol : Int32 = totalUniqueValues + 1
      mtfIndex : Int32 = 0
      repeatCount : Int32 = 0
      totalRunAs : Int32 = 0
      totalRunBs : Int32 = 0
      mtfPosition : Int32 = 0

      @bwtLength.times do |i|
        # Move-To-Front
        mtfPosition = symbolMTF.valueToFront(huffmanSymbolMap[@bwtBlock.unsafe_fetch(i) & 0xFF])

        # Run-length Encoding
        if mtfPosition == 0
          repeatCount += 1
        else
          if repeatCount > 0
            repeatCount -= 1

            loop do
              if (repeatCount & 1) == 0
                @mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNA.to_u16!)
                mtfIndex += 1
                totalRunAs += 1
              else
                @mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNB.to_u16!)
                mtfIndex += 1
                totalRunBs += 1
              end

              break if repeatCount <= 1
              repeatCount = (repeatCount - 2) >> 1
            end

            repeatCount = 0
          end
          @mtfBlock.unsafe_put(mtfIndex, (mtfPosition + 1).to_u16!)
          mtfIndex += 1
          @mtfSymbolFrequencies.unsafe_put(mtfPosition + 1,
                                           @mtfSymbolFrequencies.unsafe_fetch(mtfPosition + 1) + 1)
        end
      end

      if repeatCount > 0
        repeatCount -= 1
        loop do
          if (repeatCount & 1) == 0
            @mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNA.to_u16!)
            mtfIndex += 1
            totalRunAs += 1
          else
            @mtfBlock.unsafe_put(mtfIndex, RLE_SYMBOL_RUNB.to_u16!)
            mtfIndex += 1
            totalRunBs += 1
          end

          break if repeatCount <= 1
          repeatCount = (repeatCount - 2) >> 1
        end
      end

      @mtfBlock.unsafe_put(mtfIndex, endOfBlockSymbol.to_u16!)

      @mtfSymbolFrequencies.unsafe_put(endOfBlockSymbol,
                                       @mtfSymbolFrequencies.unsafe_fetch(endOfBlockSymbol) + 1)
      @mtfSymbolFrequencies.unsafe_put(RLE_SYMBOL_RUNA,
                                       @mtfSymbolFrequencies.unsafe_fetch(RLE_SYMBOL_RUNA) + totalRunAs)
      @mtfSymbolFrequencies.unsafe_put(RLE_SYMBOL_RUNB,
                                       @mtfSymbolFrequencies.unsafe_fetch(RLE_SYMBOL_RUNB) + totalRunBs)

      @mtfLength = mtfIndex + 1
      @mtfAlphabetSize = endOfBlockSymbol + 1
    end
  end
end