#### libremiliacr
#### Copyright(C) 2020-2024 Remilia Scarlet <remilia@posteo.jp>
####
#### This program is free software: you can redistribute it and/or modify
#### it under the terms of the GNU General Public License as published
#### the Free Software Foundation, either version 3 of the License, or
#### (at your option) any later version.
####
#### This program is distributed in the hope that it will be useful,
#### but WITHOUT ANY WARRANTY; without even the implied warranty of
#### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
#### GNU General Public License for more details.
####
#### You should have received a copy of the GNU General Public License
#### along with this program.If not, see<http:####www.gnu.org/licenses/.>
require "./xxhash/*"
####
#### xxHash Implementation, public interface.
####
#### Remi: xxHash is meant to be a very fast hash. As such, the internal
#### implementations (in the xxhash/ directory, and the streaming
#### implementations below) use a lot of unsafe operations to save every last
#### cycle.
####
###=============================================================================
###
### xxHash XXH3 Algorithm (64-bit)
###
# An implementation of xxHash that produces 64-bit hashes using the XXH3
# algorithm.
class RemiLib::Digest::XXHash3
# Creates a new `XXHash3` instance.
private def initialize
end
# Calculates the hash for *buf* and returns it.
def self.calculate(buf : Bytes|Array(UInt8), seed : UInt64 = 0) : UInt64
slc = case buf
when Bytes then buf
else Slice(UInt8).new(buf.to_unsafe, buf.size)
end
getHash(slc, seed)
end
# Internal driver.
@[AlwaysInline]
private def self.getHash(input : Bytes, seed : UInt64) : UInt64
secret = Slice(UInt8).new(XXH3_SECRET.to_unsafe, XXH3_SECRET.size)
xxh64BitsInternal(input, seed, secret)
end
end
###=============================================================================
###
### xxHash XXH64 Algorithm
###
# An implementation of xxHash that produces 64-bit hashes using the XXH64
# algorithm.
class RemiLib::Digest::XXHash64
# Total length hashed. This is always 64-bit.
@totalLen : UInt64 = 0
# Accumulator lanes
@v1 : UInt64 = 0
@v2 : UInt64 = 0
@v3 : UInt64 = 0
@v4 : UInt64 = 0
# Internal buffer for partial reads. Treated as an Slice(UInt8) of 32
# elements.
@mem : Slice(UInt64) = Slice(UInt64).new(4, 0u64)
# Amount of data in @mem.
@memSize : UInt64 = 0
# Last computed hash.
@hash : UInt64? = nil
# Creates a new `XXHash64` instance that can be used to incrementally compute
# hashes.
def initialize(@seed : UInt64 = 0)
reset(@seed)
end
# Updates the current running hash using the bytes in *buffer*.
def update(buffer : Bytes|Array(UInt8)) : Nil
# Remi: This is actually taken from the original C code. See
# XXH64_update().
return if buffer.empty?
@hash = nil
len = buffer.size
inputPtr : PUInt8 = buffer.to_unsafe
finish : PUInt8 = inputPtr + len
memPtr : PUInt64 = @mem.to_unsafe
@totalLen = @totalLen &+ len
if @memSize &+ len < 32
# Fill in temp buffer.
(memPtr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, len)
@memSize = @memSize &+ len
return
end
if @memSize != 0
# Temp buffer is full.
(memPtr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, 32 &- @memSize)
{% for i in 1..4 %}
@v{{i}} = XXHash64.round(@v{{i}}, XXHash64.readLE64((memPtr + {{i - 1}}).unsafe_as(PUInt8)))
{% end %}
inputPtr += 32 &- @memSize
@memSize = 0
end
if (inputPtr + 32) <= finish
limit : PUInt8 = finish - 32
loop do
{% for i in 1..4 %}
@v{{i}} = XXHash64.round(@v{{i}}, XXHash64.readLE64(inputPtr))
inputPtr += 8
{% end %}
break unless inputPtr <= limit
end
end
if inputPtr < finish
memPtr.unsafe_as(PUInt8).copy_from(inputPtr, finish - inputPtr)
@memSize = (finish - inputPtr).to_u64!
end
end
# Returns the current hash value.
def hash : UInt64
# If we have a hash memoized, just return it.
@hash.try { |h| return h }
h64 : UInt64 = 0
if @totalLen >= 32
h64 = XXHash64.rotl64(@v1, 1) &+
XXHash64.rotl64(@v2, 7) &+
XXHash64.rotl64(@v3, 12) &+
XXHash64.rotl64(@v4, 18)
{% for i in 1..4 %}
h64 = XXHash64.mergeRound(h64, @v{{i}})
{% end %}
else
h64 = @seed &+ XXH_PRIME64_5
end
h64 = h64 &+ @totalLen
@hash = XXHash64.xxhFinalize(h64, @mem.to_unsafe.unsafe_as(PUInt8), @totalLen)
@hash.not_nil!
end
# Resets this instance to its initial state. This can also be used to change
# the seed that is used.
def reset(@seed : UInt64 = 0) : Nil
@v1 = @seed &+ XXH_PRIME64_1 &+ XXH_PRIME64_2
@v2 = @seed &+ XXH_PRIME64_2
@v3 = @seed
@v4 = @seed &- XXH_PRIME64_1
@totalLen = 0
@mem.fill(0u64)
@memSize = 0
@hash = nil
end
# Calculates the hash by reading from *io* until the end of the stream, then
# returns the calculated hash.
def self.calculate(io : IO, bufSize : Int = 4096) : UInt64
hasher = RemiLib::Digest::XXHash64.new
buf = Slice(UInt8).new(4096, 0u8)
while (numRead = io.read(buf)) > 0
hasher.update(buf[...numRead])
end
hasher.hash
end
# Internal driver.
@[AlwaysInline]
private def self.getHash(input : Bytes, seed : UInt64) : UInt64
xxh64(input, seed)
end
# Calculates the hash for *buf* and returns it.
def self.calculate(buf : Bytes|Array(UInt8), seed : UInt64 = 0)
slc = case buf
when Bytes then buf
else Slice(UInt8).new(buf.to_unsafe, buf.size)
end
getHash(slc, seed)
end
end
###=============================================================================
###
### xxHash XXH32 Algorithm
###
# An implementation of xxHash that produces 32-bit hashes using the XXH32
# algorithm.
class RemiLib::Digest::XXHash32
# Total length hashed, modulo 2^32
@totalLen : UInt32 = 0
# Whether the hash is >= 16 (handles @totalLen overflow)
@largeLen : UInt32 = 0
# Accumulator lanes
@v1 : UInt32 = 0
@v2 : UInt32 = 0
@v3 : UInt32 = 0
@v4 : UInt32 = 0
# Internal buffer for partial reads. Treated as an Slice(UInt8) of 16
# elements.
@mem32 : Slice(UInt32) = Slice(UInt32).new(4, 0u32)
# Amount of data in @mem32.
@memSize : UInt32 = 0
# Last computed hash.
@hash : UInt32? = nil
# Creates a new `XXHash32` instance that can be used to incrementally compute
# hashes.
def initialize(@seed : UInt32 = 0)
reset(@seed)
end
# Updates the current running hash using the bytes in *buffer*.
def update(buffer : Bytes|Array(UInt8)) : Nil
# Remi: This is actually taken from the original C code. See
# XXH32_update().
return if buffer.empty?
@hash = nil
len = buffer.size
inputPtr : PUInt8 = buffer.to_unsafe
finish : PUInt8 = inputPtr + len
mem32Ptr : PUInt32 = @mem32.to_unsafe
@totalLen = @totalLen &+ len
@largeLen |= ((len >= 16 ? 1u32 : 0u32) | (@totalLen >= 16 ? 1u32 : 0u32))
if @memSize + len < 16
# Fill in the temp buffer.
(mem32Ptr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, len)
@memSize = @memSize &+ len
return
end
if @memSize != 0
# There is some data left from the previous update.
(mem32Ptr.unsafe_as(PUInt8) + @memSize).copy_from(inputPtr, 16 &- @memSize)
p32 : PUInt8 = mem32Ptr.unsafe_as(PUInt8)
{% for i in 1..4 %}
@v{{i}} = XXHash32.round(@v{{i}}, XXHash32.readLE32(p32))
p32 += 4
{% end %}
inputPtr += 16 &- @memSize
@memSize = 0
end
if inputPtr <= (finish - 16)
limit : PUInt8 = finish - 16
loop do
{% for i in 1..4 %}
@v{{i}} = XXHash32.round(@v{{i}}, XXHash32.readLE32(inputPtr))
inputPtr += 4
{% end %}
break unless inputPtr <= limit
end
end
if inputPtr < finish
mem32Ptr.unsafe_as(PUInt8).copy_from(inputPtr, finish - inputPtr)
@memSize = (finish - inputPtr).to_u32!
end
end
# Returns the current hash value.
def hash : UInt32
# If we have a hash memoized, just return it.
@hash.try { |h| return h }
h32 : UInt32 = if @largeLen != 0
XXHash32.rotl32(@v1, 1) &+
XXHash32.rotl32(@v2, 7) &+
XXHash32.rotl32(@v3, 12) &+
XXHash32.rotl32(@v4, 18)
else
@seed &+ XXH_PRIME32_5
end
h32 = h32 &+ @totalLen
@hash = XXHash32.xxhFinalize(h32, @mem32.to_unsafe.unsafe_as(PUInt8), @memSize)
@hash.not_nil!
end
# Resets this instance to its initial state. This can also be used to change
# the seed that is used.
def reset(@seed : UInt32 = 0) : Nil
@v1 = @seed &+ XXH_PRIME32_1 &+ XXH_PRIME32_2
@v2 = @seed &+ XXH_PRIME32_2
@v3 = @seed
@v4 = @seed &- XXH_PRIME32_1
@totalLen = 0
@largeLen = 0
@mem32.fill(0u32)
@memSize = 0
@hash = nil
end
# Calculates the hash for *buf* and returns it.
def self.calculate(buf : Bytes|Array(UInt8), seed : UInt32 = 0)
slc = case buf
when Bytes then buf
else Slice(UInt8).new(buf.to_unsafe, buf.size)
end
getHash(slc, seed)
end
# Calculates the hash by reading from *io* until the end of the stream, then
# returns the calculated hash.
def self.calculate(io : IO, bufSize : Int = 4096) : UInt32
hasher = RemiLib::Digest::XXHash32.new
buf = Slice(UInt8).new(4096, 0u8)
while (numRead = io.read(buf)) > 0
hasher.update(buf[...numRead])
end
hasher.hash
end
# Internal driver.
@[AlwaysInline]
private def self.getHash(input : Bytes, seed : UInt32) : UInt32
xxh32(input, seed)
end
end
###=============================================================================
###
### xxHash XXH3 Algorithm (128-bit)
###
# An implementation of xxHash that produces 128-bit hashes using the XXH3
# algorithm.
class RemiLib::Digest::XXHash128
# Creates a new `XXHash128` instance.
private def initialize
end
# Calculates the hash for *buf* and returns it.
def self.calculate(buf : Bytes|Array(UInt8), seed : UInt64 = 0) : UInt128
slc = case buf
when Bytes then buf
else Slice(UInt8).new(buf.to_unsafe, buf.size)
end
getHash(slc, seed)
end
# Internal driver.
@[AlwaysInline]
private def self.getHash(input : Bytes, seed : UInt64) : UInt128
secret = Slice(UInt8).new(XXH3_SECRET.to_unsafe, XXH3_SECRET.size)
xxh3Internal128Bit(input, seed, secret)
end
end