libremiliacr: Artifact [45e22f8707]

Artifact 45e22f87073485f8ec7243681f09f9a879575d036d6f59c7dfe7a132232b3e1f:

File src/remilib/digest/xxhash.cr — part of check-in [5841ed07b4] at 2024-01-05 07:26:23 on branch trunk — Copyright update time (user: alexa size: 10768)
#### libremiliacr
#### Copyright(C) 2020-2024 Remilia Scarlet <remilia@posteo.jp>
####
#### This program is free software: you can redistribute it and/or modify
#### it under the terms of the GNU General Public License as published
#### the Free Software Foundation, either version 3 of the License, or
#### (at your option) any later version.
####
#### This program is distributed in the hope that it will be useful,
#### but WITHOUT ANY WARRANTY; without even the implied warranty of
#### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
#### GNU General Public License for more details.
####
#### You should have received a copy of the GNU General Public License
#### along with this program.If not, see<http:####www.gnu.org/licenses/.>
require "./xxhash/*"

####
#### xxHash Implementation, public interface.
####
#### Remi: xxHash is meant to be a very fast hash.  As such, the internal
#### implementations (in the xxhash/ directory, and the streaming
#### implementations below) use a lot of unsafe operations to save every last
#### cycle.
####

###=============================================================================
###
### xxHash XXH3 Algorithm (64-bit)
###

# An implementation of xxHash that produces 64-bit hashes using the XXH3
# algorithm.
class RemiLib::Digest::XXHash3
  # Creates a new `XXHash3` instance.
  private def initialize
  end

  # Calculates the hash for *buf* and returns it.
  def self.calculate(buf : Bytes|Array(UInt8), seed : UInt64 = 0) : UInt64
    slc = case buf
          when Bytes then buf
          else Slice(UInt8).new(buf.to_unsafe, buf.size)
          end
    getHash(slc, seed)
  end

  # Internal driver.
  @[AlwaysInline]
  private def self.getHash(input : Bytes, seed : UInt64) : UInt64
    secret = Slice(UInt8).new(XXH3_SECRET.to_unsafe, XXH3_SECRET.size)
    xxh64BitsInternal(input, seed, secret)
  end
end

###=============================================================================
###
### xxHash XXH64 Algorithm
###

# An implementation of xxHash that produces 64-bit hashes using the XXH64
# algorithm.
class RemiLib::Digest::XXHash64
  # Total length hashed.  This is always 64-bit.
  @totalLen : UInt64 = 0

  # Accumulator lanes
  @v1 : UInt64 = 0
  @v2 : UInt64 = 0
  @v3 : UInt64 = 0
  @v4 : UInt64 = 0

  # Internal buffer for partial reads.  Treated as an Slice(UInt8) of 32
  # elements.
  @mem : Slice(UInt64) = Slice(UInt64).new(4, 0u64)

  # Amount of data in @mem.
  @memSize : UInt64 = 0

  # Last computed hash.
  @hash : UInt64? = nil

  # Creates a new `XXHash64` instance that can be used to incrementally compute
  # hashes.
  def initialize(@seed : UInt64 = 0)
    reset(@seed)
  end

  # Updates the current running hash using the bytes in *buffer*.
  def update(buffer : Bytes|Array(UInt8)) : Nil
    # Remi: This is actually taken from the original C code.  See
    # XXH64_update().
    return if buffer.empty?

    @hash = nil
    len = buffer.size
    inputPtr : PUInt8 = buffer.to_unsafe
    finish : PUInt8 = inputPtr + len
    memPtr : PUInt64 = @mem.to_unsafe

    @totalLen = @totalLen &+ len

    if @memSize &+ len < 32
      # Fill in temp buffer.
      (memPtr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, len)
      @memSize = @memSize &+ len
      return
    end

    if @memSize != 0
      # Temp buffer is full.
      (memPtr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, 32 &- @memSize)

      {% for i in 1..4 %}
        @v{{i}} = XXHash64.round(@v{{i}}, XXHash64.readLE64((memPtr + {{i - 1}}).unsafe_as(PUInt8)))
      {% end %}
      inputPtr += 32 &- @memSize
      @memSize = 0
    end

    if (inputPtr + 32) <= finish
      limit : PUInt8 = finish - 32

      loop do
        {% for i in 1..4 %}
          @v{{i}} = XXHash64.round(@v{{i}}, XXHash64.readLE64(inputPtr))
          inputPtr += 8
        {% end %}
        break unless inputPtr <= limit
      end
    end

    if inputPtr < finish
      memPtr.unsafe_as(PUInt8).copy_from(inputPtr, finish - inputPtr)
      @memSize = (finish - inputPtr).to_u64!
    end
  end

  # Returns the current hash value.
  def hash : UInt64
    # If we have a hash memoized, just return it.
    @hash.try { |h| return h }

    h64 : UInt64 = 0
    if @totalLen >= 32
      h64 = XXHash64.rotl64(@v1, 1) &+
            XXHash64.rotl64(@v2, 7) &+
            XXHash64.rotl64(@v3, 12) &+
            XXHash64.rotl64(@v4, 18)
      {% for i in 1..4 %}
        h64 = XXHash64.mergeRound(h64, @v{{i}})
      {% end %}
    else
      h64 = @seed &+ XXH_PRIME64_5
    end

    h64 = h64 &+ @totalLen
    @hash = XXHash64.xxhFinalize(h64, @mem.to_unsafe.unsafe_as(PUInt8), @totalLen)
    @hash.not_nil!
  end

  # Resets this instance to its initial state.  This can also be used to change
  # the seed that is used.
  def reset(@seed : UInt64 = 0) : Nil
    @v1 = @seed &+ XXH_PRIME64_1 &+ XXH_PRIME64_2
    @v2 = @seed &+ XXH_PRIME64_2
    @v3 = @seed
    @v4 = @seed &- XXH_PRIME64_1
    @totalLen = 0
    @mem.fill(0u64)
    @memSize = 0
    @hash = nil
  end

  # Calculates the hash by reading from *io* until the end of the stream, then
  # returns the calculated hash.
  def self.calculate(io : IO, bufSize : Int = 4096) : UInt64
    hasher = RemiLib::Digest::XXHash64.new
    buf = Slice(UInt8).new(4096, 0u8)
    while (numRead = io.read(buf)) > 0
      hasher.update(buf[...numRead])
    end
    hasher.hash
  end

  # Internal driver.
  @[AlwaysInline]
  private def self.getHash(input : Bytes, seed : UInt64) : UInt64
    xxh64(input, seed)
  end

  # Calculates the hash for *buf* and returns it.
  def self.calculate(buf : Bytes|Array(UInt8), seed : UInt64 = 0)
    slc = case buf
          when Bytes then buf
          else Slice(UInt8).new(buf.to_unsafe, buf.size)
          end
    getHash(slc, seed)
  end
end

###=============================================================================
###
### xxHash XXH32 Algorithm
###

# An implementation of xxHash that produces 32-bit hashes using the XXH32
# algorithm.
class RemiLib::Digest::XXHash32
  # Total length hashed, modulo 2^32
  @totalLen : UInt32 = 0

  # Whether the hash is >= 16 (handles @totalLen overflow)
  @largeLen : UInt32 = 0

  # Accumulator lanes
  @v1 : UInt32 = 0
  @v2 : UInt32 = 0
  @v3 : UInt32 = 0
  @v4 : UInt32 = 0

  # Internal buffer for partial reads.  Treated as an Slice(UInt8) of 16
  # elements.
  @mem32 : Slice(UInt32) = Slice(UInt32).new(4, 0u32)

  # Amount of data in @mem32.
  @memSize : UInt32 = 0

  # Last computed hash.
  @hash : UInt32? = nil

  # Creates a new `XXHash32` instance that can be used to incrementally compute
  # hashes.
  def initialize(@seed : UInt32 = 0)
    reset(@seed)
  end

  # Updates the current running hash using the bytes in *buffer*.
  def update(buffer : Bytes|Array(UInt8)) : Nil
    # Remi: This is actually taken from the original C code.  See
    # XXH32_update().
    return if buffer.empty?

    @hash = nil
    len = buffer.size
    inputPtr : PUInt8 = buffer.to_unsafe
    finish : PUInt8 = inputPtr + len
    mem32Ptr : PUInt32 = @mem32.to_unsafe

    @totalLen = @totalLen &+ len
    @largeLen |= ((len >= 16 ? 1u32 : 0u32) | (@totalLen >= 16 ? 1u32 : 0u32))

    if @memSize + len < 16
      # Fill in the temp buffer.
      (mem32Ptr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, len)
      @memSize = @memSize &+ len
      return
    end

    if @memSize != 0
      # There is some data left from the previous update.
      (mem32Ptr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, 16 &- @memSize)
      p32 : PUInt8  = mem32Ptr.unsafe_as(PUInt8)

      {% for i in 1..4 %}
        @v{{i}} = XXHash32.round(@v{{i}}, XXHash32.readLE32(p32))
        p32 += 4
      {% end %}
      inputPtr += 16 &- @memSize
      @memSize = 0
    end

    if inputPtr <= (finish - 16)
      limit : PUInt8 = finish - 16

      loop do
        {% for i in 1..4 %}
          @v{{i}} = XXHash32.round(@v{{i}}, XXHash32.readLE32(inputPtr))
          inputPtr += 4
        {% end %}
        break unless inputPtr <= limit
      end
    end

    if inputPtr < finish
      mem32Ptr.unsafe_as(PUInt8).copy_from(inputPtr, finish - inputPtr)
      @memSize = (finish - inputPtr).to_u32!
    end
  end

  # Returns the current hash value.
  def hash : UInt32
    # If we have a hash memoized, just return it.
    @hash.try { |h| return h }

    h32 : UInt32 = if @largeLen != 0
                     XXHash32.rotl32(@v1, 1) &+
                     XXHash32.rotl32(@v2, 7) &+
                     XXHash32.rotl32(@v3, 12) &+
                     XXHash32.rotl32(@v4, 18)
                   else
                     @seed &+ XXH_PRIME32_5
                   end
    h32 = h32 &+ @totalLen
    @hash = XXHash32.xxhFinalize(h32, @mem32.to_unsafe.unsafe_as(PUInt8), @memSize)
    @hash.not_nil!
  end

  # Resets this instance to its initial state.  This can also be used to change
  # the seed that is used.
  def reset(@seed : UInt32 = 0) : Nil
    @v1 = @seed &+ XXH_PRIME32_1 &+ XXH_PRIME32_2
    @v2 = @seed &+ XXH_PRIME32_2
    @v3 = @seed
    @v4 = @seed &- XXH_PRIME32_1
    @totalLen = 0
    @largeLen = 0
    @mem32.fill(0u32)
    @memSize = 0
    @hash = nil
  end

  # Calculates the hash for *buf* and returns it.
  def self.calculate(buf : Bytes|Array(UInt8), seed : UInt32 = 0)
    slc = case buf
          when Bytes then buf
          else Slice(UInt8).new(buf.to_unsafe, buf.size)
          end
    getHash(slc, seed)
  end

  # Calculates the hash by reading from *io* until the end of the stream, then
  # returns the calculated hash.
  def self.calculate(io : IO, bufSize : Int = 4096) : UInt32
    hasher = RemiLib::Digest::XXHash32.new
    buf = Slice(UInt8).new(4096, 0u8)
    while (numRead = io.read(buf)) > 0
      hasher.update(buf[...numRead])
    end
    hasher.hash
  end

  # Internal driver.
  @[AlwaysInline]
  private def self.getHash(input : Bytes, seed : UInt32) : UInt32
    xxh32(input, seed)
  end
end

###=============================================================================
###
### xxHash XXH3 Algorithm (128-bit)
###

# An implementation of xxHash that produces 128-bit hashes using the XXH3
# algorithm.
class RemiLib::Digest::XXHash128
  # Creates a new `XXHash128` instance.
  private def initialize
  end

  # Calculates the hash for *buf* and returns it.
  def self.calculate(buf : Bytes|Array(UInt8), seed : UInt64 = 0) : UInt128
    slc = case buf
          when Bytes then buf
          else Slice(UInt8).new(buf.to_unsafe, buf.size)
          end
    getHash(slc, seed)
  end

  # Internal driver.
  @[AlwaysInline]
  private def self.getHash(input : Bytes, seed : UInt64) : UInt128
    secret = Slice(UInt8).new(XXH3_SECRET.to_unsafe, XXH3_SECRET.size)
    xxh3Internal128Bit(input, seed, secret)
  end
end