Login
spookyhash.cr at tip
Login

File src/remilib/digest/spookyhash.cr from the latest check-in


     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
   100
   101
   102
   103
   104
   105
   106
   107
   108
   109
   110
   111
   112
   113
   114
   115
   116
   117
   118
   119
   120
   121
   122
   123
   124
   125
   126
   127
   128
   129
   130
   131
   132
   133
   134
   135
   136
   137
   138
   139
   140
   141
   142
   143
   144
   145
   146
   147
   148
   149
   150
   151
   152
   153
   154
   155
   156
   157
   158
   159
   160
   161
   162
   163
   164
   165
   166
   167
   168
   169
   170
   171
   172
   173
   174
   175
   176
   177
   178
   179
   180
   181
   182
   183
   184
   185
   186
   187
   188
   189
   190
   191
   192
   193
   194
   195
   196
   197
   198
   199
   200
   201
   202
   203
   204
   205
   206
   207
   208
   209
   210
   211
   212
   213
   214
   215
   216
   217
   218
   219
   220
   221
   222
   223
   224
   225
   226
   227
   228
   229
   230
   231
   232
   233
   234
   235
   236
   237
   238
   239
   240
   241
   242
   243
   244
#### SpookyHash
#### Copyright (c) 2023-2024, Remilia Scarlet
#### Copyright (c) 2015, Guillaume Voirin
#### BSD 3-Clause License
####
#### All rights reserved.
####
#### Redistribution and use in source and binary forms, with or without
#### modification, are permitted provided that the following conditions are met:
####
####     Redistributions of source code must retain the above copyright notice,
####     this list of conditions and the following disclaimer.
####
####     Redistributions in binary form must reproduce the above copyright
####     notice, this list of conditions and the following disclaimer in the
####     documentation and/or other materials provided with the distribution.
####
####     Neither the name of the copyright holder nor the names of its
####     contributors may be used to endorse or promote products derived from
####     this software without specific prior written permission.
####
#### THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#### AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#### ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
#### LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#### CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#### SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#### INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#### CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#### ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#### POSSIBILITY OF SUCH DAMAGE.
require "./spookyhash/*"

####
#### SpookyHash Implementation, public interface.
####
#### Remi: SpookyHash is meant to be a very fast hash.  As such, the internal
#### implementation (in the SpookyHash/ directory) use a lot of unsafe
#### operations to save every last cycle.
####

# An implementation of SpookyHash that produces 128-, 64-, and 32-bit hashes.
class RemiLib::Digest::SpookyHash
  @hash : UInt128? = nil

  # Creates a new `SpookyHash` instance.
  def initialize
  end

  # Creates a new `SpookyHash` instance.  *seed1* is the first eight bytes of a
  # custom seed, while *seed2* is the last eight bytes of a custom seed.
  def initialize(seed1 : UInt64, seed2 : UInt64)
    @state[0] = seed1
    @state[1] = seed2
  end

  {% begin %}
    def self.calculate128(input : Bytes, inputHash : UInt128 = 0) : UInt128
      hash1, hash2 = splitU128(inputHash)
      hash1Ptr : PUInt64 = pointerof(hash1)
      hash2Ptr : PUInt64 = pointerof(hash2)
      inputPtr : PUInt8 = input.to_unsafe
      len = input.size

      if len < SPOOKYHASH_BUFFER_SIZE
        SpookyHash.short(inputPtr, input.size, hash1Ptr, hash2Ptr)
        return makeU128(hash1, hash2)
      end

      {% for i in 0..11 %}
        h{{i}} : UInt64 = 0
        h{{i}}Ptr : PUInt64 = pointerof(h{{i}})
      {% end %}

      buf : Slice(UInt64) = Slice(UInt64).new(SPOOKYHASH_VARIABLES, 0u64)
      bufPtr : PUInt64 = buf.to_unsafe
      u : SpookyHashLib::DataUnion128 = SpookyHashLib::DataUnion128.new
      remainder : Int32 = 0

      h0 = h3 = h6 = h9 = hash1
      h1 = h4 = h7 = h10 = hash2
      h2 = h5 = h8 = h11 = SPOOKYHASH_CONSTANT

      u.p8 = inputPtr
      finish : PUInt64 = u.p64 + (len.tdiv(SPOOKYHASH_BLOCK_SIZE) &* SPOOKYHASH_VARIABLES)

      if true || !bitflag?(u.i, 0x07)
        while u.p64 < finish
          mix(u.p64, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
          u.p64 += SPOOKYHASH_VARIABLES
        end
      else
        while u.p64 < finish
          bufPtr.copy_from(u.p64, SPOOKYHASH_BLOCK_SIZE)
          mix(bufPtr, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
          u.p64 += SPOOKYHASH_VARIABLES
        end
      end

      remainder = len &- (finish.unsafe_as(PUInt8) - inputPtr)
      bufPtr.copy_from(finish, remainder)

      # Hacky memset-ish thing
      bufPtrP8 = bufPtr.unsafe_as(PUInt8) + remainder
      (SPOOKYHASH_BLOCK_SIZE &- remainder).times do |i|
        bufPtrP8[i] = 0u8
      end

      bufPtr.unsafe_as(PUInt8)[SPOOKYHASH_BLOCK_SIZE &- 1] = remainder.to_u8!

      finish(bufPtr, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
      makeU128(h0, h1)
    end
  {% end %}

  def self.calculate64(input : Bytes, seed : UInt64 = 0) : UInt64
    inputHash = makeU128(seed, seed)
    calculate128(input, inputHash).to_u64!
  end

  def self.calculate32(input : Bytes, seed : UInt32 = 0) : UInt32
    inputHash = makeU128(seed.to_u64!, seed.to_u64!)
    calculate128(input, inputHash).to_u32!
  end


  {% begin %}
    def update(input : Bytes) : Nil
      @hash = nil
      inputPtr : PUInt8 = input.to_unsafe
      len = input.size
      newLen : Int32 = len &+ @remainder
      dataPtr : PUInt64 = @data.to_unsafe

      if newLen < SPOOKYHASH_BUFFER_SIZE
        (dataPtr.unsafe_as(PUInt8) + @remainder).copy_from(inputPtr, len)
        @length = len.to_i64! &+ @length
        @remainder = newLen.to_u8!
        return
      end

      {% for i in 0..11 %}
        h{{i}} : UInt64 = 0
        h{{i}}Ptr : PUInt64 = pointerof(h{{i}})
      {% end %}
      remainder : UInt8 = 0
      u : SpookyHashLib::DataUnion128 = SpookyHashLib::DataUnion128.new

      if @length < SPOOKYHASH_BUFFER_SIZE
        h0 = h3 = h6 = h9 = @state.unsafe_fetch(0)
        h1 = h4 = h7 = h10 = @state.unsafe_fetch(1)
        h2 = h5 = h8 = h11 = SPOOKYHASH_CONSTANT
      else
        {% for i in 0..11 %}
          h{{i}} = @state.unsafe_fetch({{i}})
        {% end %}
      end

      @length = len.to_i64! &+ @length

      if @remainder != 0
        prefix : UInt8 = (SPOOKYHASH_BUFFER_SIZE &- @remainder).to_u8!
        (dataPtr.unsafe_as(PUInt8) + @remainder).copy_from(inputPtr, prefix)
        u.p64 = dataPtr
        SpookyHash.mix(u.p64, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
        SpookyHash.mix(u.p64 + SPOOKYHASH_VARIABLES,
                       h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
        u.p8 = inputPtr + prefix
        len = len &- prefix
      else
        u.p8 = inputPtr
      end

      finish : PUInt64 = u.p64 + (len.tdiv(SPOOKYHASH_BLOCK_SIZE) &* SPOOKYHASH_VARIABLES)
      remainder = (len.to_u64! &- (finish.unsafe_as(PUInt8) - u.p8)).to_u8!
      unless bitflag?(u.i, 0x07)
        while u.p64 < finish
          SpookyHash.mix(u.p64, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
          u.p64 += SPOOKYHASH_VARIABLES
        end
      else
        while u.p64 < finish
          dataPtr.unsafe_as(PUInt8).copy_from(u.p8, SPOOKYHASH_BLOCK_SIZE)
          SpookyHash.mix(dataPtr, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
          u.p64 += SPOOKYHASH_VARIABLES
        end
      end

      @remainder = remainder
      dataPtr.copy_from(finish, remainder)

      {% for i in 0..11 %}
        @state.unsafe_put({{i}}, h{{i}})
      {% end %}
    end
  {% end %}

  {% begin %}
    def hash : UInt128
      # Return memoized value if possible.
      @hash.try { |ret| return ret }

      dataPtr : PUInt64 = @data.to_unsafe
      if @length < SPOOKYHASH_BUFFER_SIZE
        hash1 : UInt64 = @state.unsafe_fetch(0)
        hash2 : UInt64 = @state.unsafe_fetch(1)
        SpookyHash.short(dataPtr.unsafe_as(PUInt8), @length, pointerof(hash1), pointerof(hash2))
        @hash = makeU128(hash1, hash2)
        return @hash.not_nil!
      end

      remainder : UInt8 = @remainder
      {% for i in 0..11 %}
        h{{i}} : UInt64 = @state.unsafe_fetch({{i}})
        h{{i}}Ptr : PUInt64 = pointerof(h{{i}})
      {% end %}

      if remainder >= SPOOKYHASH_BLOCK_SIZE
        SpookyHash.mix(dataPtr, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
        dataPtr += SPOOKYHASH_VARIABLES
        remainder = remainder &- SPOOKYHASH_BLOCK_SIZE
      end

      dataPtrRem : PUInt8 = dataPtr.unsafe_as(PUInt8) + remainder
      (SPOOKYHASH_BLOCK_SIZE &- remainder).times do |i|
        dataPtrRem[i] = 0
      end
      dataPtr.unsafe_as(PUInt8)[SPOOKYHASH_BLOCK_SIZE &- 1] = remainder

      SpookyHash.finish(dataPtr, h0Ptr, h1Ptr, h2Ptr, h3Ptr, h4Ptr, h5Ptr, h6Ptr, h7Ptr, h8Ptr, h9Ptr, h10Ptr, h11Ptr)
      @hash = makeU128(h0, h1)
      @hash.not_nil!
    end
  {% end %}

  def hash64 : UInt64
    hash.to_u64!
  end

  def hash32 : UInt32
    hash.to_u32!
  end
end