Login
huffmanallocator.cr at tip
Login

File src/remilib/compression/bzip/huffmanallocator.cr from the latest check-in


     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
   141
   142
   143
   144
   145
   146
   147
   148
   149
   150
   151
   152
   153
   154
   155
   156
   157
   158
   159
   160
   161
   162
   163
   164
   165
   166
   167
   168
   169
   170
   171
   172
   173
   174
   175
   176
   177
   178
   179
   180
   181
   182
   183
   184
   185
   186
   187
   188
   189
   190
   191
   192
   193
   194
   195
   196
#### Bzip2 Implementation
#### Copyright (C) 2023-2024 Remilia Scarlet
#### Copyright (C) 2015 Jaime Olivares
#### Copyright (c) 2011 Matthew Francis
#### MIT License
####
#### Ported from the Java implementation by Matthew Francis:
#### https://github.com/MateuszBartosiewicz/bzip2.
####
#### Ported by Remilia Scarlet from the C# implementation by Jamie Olivares:
#### http://github.com/jaime-olivares/bzip2

module RemiLib::Compression::BZip2
  # An in-place, length restricted Canonical Huffman code length allocator.
  #
  # Based on the algorithm proposed by R.L.Milidiú, A.A.Pessoa and E.S.Laber in
  # ["In-place Length-Restricted Prefix
  # Coding"](http://www-di.inf.puc-rio.br/~laber/public/spire98.ps) and
  # incorporating additional ideas from the implementation of
  # ["shcodec"](http://webcenter.ru/~xander/) by Simakov Alexander.
  private module HuffmanAllocator
    extend self

    # Allocates Canonical Huffman code lengths in place based on a sorted
    # frequency array.
    #
    # `arr` should be a sorted array of symbol frequencies.  These will be
    # modified in-place into an array of canonical Huffman code lengths.
    #
    # `maxLength` is the maximum code length.  This must be at least
    # `Math.ceil(Math.log2(arr.size))`.
    def allocateHuffmanCodeLengths(arr : Array(Int32), maxLength : Int) : Nil
      case arr.size
      when 2
        arr[1] = 1
      when 1
        arr[0] = 1
        return
      end

      setExtendedParentPointers(arr)

      nodesToRelocate : Int32 = findNotesToRelocate(arr, maxLength)

      if (arr[0] % arr.size) >= nodesToRelocate
        allocateNodeLengths(arr)
      else
        insertDepth : Int32 = maxLength - significantBits(nodesToRelocate - 1)
        allocateNodeLengthsWithRelocation(arr, nodesToRelocate, insertDepth)
      end
    end

    private def first(array : Array(Int32), i : Int32, nodesToMove : Int32) : Int32
      len : Int32 = array.size
      limit : Int32 = i
      k : Int32 = array.size - 2

      while (i >= nodesToMove) && (array[i] % len) > limit
        k = i
        i -= (limit - i + 1)
      end
      i = Math.max(nodesToMove - 1, i)

      tmp : Int32 = 0
      while k > (i + 1)
        tmp = (i + k) >> 1
        if (array[tmp] % len) > limit
          k = tmp
        else
          i = tmp
        end
      end

      k
    end

    # Fills the code array with extended parent pointers.
    private def setExtendedParentPointers(array : Array(Int32)) : Nil
      len : Int32 = array.size
      array[0] += array[1]

      headNode : Int32 = 0
      tailNode : Int32 = 1
      topNode : Int32 = 2
      tmp : Int32 = 0

      while tailNode < (len - 1)
        tmp = 0
        if topNode >= len || array[headNode] < array[topNode]
          tmp = array[headNode]
          array[headNode] = tailNode
          headNode += 1
        else
          tmp = array[topNode]
          topNode += 1
        end

        if topNode >= len || (headNode < tailNode && array[headNode] < array[topNode])
          tmp += array[headNode]
          array[headNode] = tailNode + len
          headNode += 1
        else
          tmp += array[topNode]
          topNode += 1
        end

        array[tailNode] = tmp
        tailNode += 1
      end
    end

    # Finds the number of nodes to relocate in order to achieve a given code length limit.
    private def findNotesToRelocate(array : Array(Int32), maxLength : Int32) : Int32
      curNode : Int32 = array.size - 2
      curDepth : Int32 = 1
      lastNode : Int32 = 0

      while curDepth < (maxLength - 1) && curNode > 1
        curNode = first(array, curNode - 1, 0)
        curDepth += 1
      end

      curNode
    end

    # A final allocation pass with no code length limit.
    private def allocateNodeLengths(array : Array(Int32)) : Nil
      firstNode : Int32 = array.size - 2
      nextNode : Int32 = array.size - 1
      currentDepth : Int32 = 1
      availableNodes : Int32 = 2
      lastNode : Int32 = 0

      while availableNodes > 0
        lastNode = firstNode
        firstNode = first(array, lastNode - 1, 0)

        (availableNodes - (lastNode - firstNode)).downto(1) do |_|
          array[nextNode] = currentDepth
          nextNode -= 1
        end

        availableNodes = (lastNode - firstNode) << 1
        currentDepth += 1
      end
    end

    # A final allocation pass that relocates nodes in order to achieve a maximum
    # code length limit.
    private def allocateNodeLengthsWithRelocation(array : Array(Int32),
                                                  nodesToMove : Int32, insertDepth : Int32) : Nil
      firstNode : Int32 = array.size - 2
      nextNode : Int32 = array.size - 1
      currentDepth : Int32 = (insertDepth == 1 ? 2 : 1)
      nodesLeftToMove = (insertDepth == 1 ? nodesToMove - 2 : nodesToMove)
      availableNodes : Int32 = currentDepth << 1
      lastNode : Int32 = 0
      offset : Int32 = 0

      while availableNodes > 0
        lastNode = firstNode
        firstNode = if firstNode <= nodesToMove
                      firstNode
                    else
                      first(array, lastNode - 1, nodesToMove)
                    end

        offset = 0
        if currentDepth >= insertDepth
          offset = Math.min(nodesLeftToMove, 1 << (currentDepth - insertDepth))
        elsif currentDepth == insertDepth - 1
          offset = 1
          firstNode += 1 if array[firstNode] == lastNode
        end

        (availableNodes - (lastNode - firstNode + offset)).downto(1) do |_|
          array[nextNode] = currentDepth
          nextNode -= 1
        end

        nodesLeftToMove -= offset
        availableNodes = (lastNode - firstNode + offset) << 1
        currentDepth += 1
      end
    end

    private def significantBits(x : Int32) : Int32
      n : Int32 = 0
      while x > 0
        x >>= 1
        n += 1
      end
      n
    end
  end
end