Login
Artifact [fc904e8d0e]
Login

Artifact fc904e8d0e7d6e31ffad3b44b7f6d786c122776f4d485c8e4b18d97793316f84:


#### MIT License
####
#### Copyright (c) 2023-2024 Remilia Scarlet
#### Copyright (c) 2018 Melnik Alexander
####
#### Permission is hereby granted, free of charge, to any person obtaining a
#### copy of this software and associated documentation files (the "Software"),
#### to deal in the Software without restriction, including without limitation
#### the rights to use, copy, modify, merge, publish, distribute, sublicense,
#### and/or sell copies of the Software, and to permit persons to whom the
#### Software is furnished to do so, subject to the following conditions:
####
#### The above copyright notice and this permission notice shall be included in
#### all copies or substantial portions of the Software.
####
#### THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#### IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#### FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#### AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#### LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
#### FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
#### DEALINGS IN THE SOFTWARE.

####
#### Common xxHash Methods
####
#### Ported from C#:
#### https://github.com/uranium62/xxHash/tree/6b20e7f7b32dfc29e5019d3d35f5b7270f1656f3
####
#### Remi: It *appears* as though the C# code was actually based on the original
#### xxHash code, though the C# version is under a different license (MIT vs
#### BSD). As such, I've copied the comments from
#### https://github.com/Cyan4973/xxHash/tree/f4bef929aa854e9f52a303c5e58fd52855a0ecfa
####

module RemiLib::Digest
  private abstract class XXHashInternal
    # :nodoc:
    # Just for less typing.
    alias PUInt8 = Pointer(UInt8)

    # :nodoc:
    # Just for less typing.
    alias PUInt32 = Pointer(UInt32)

    # :nodoc:
    # Just for less typing.
    alias PUInt64 = Pointer(UInt64)

    # Pseudorandom secret taken directly from FARSH.
    XXH3_SECRET = [
      0xb8_u8, 0xfe_u8, 0x6c_u8, 0x39_u8, 0x23_u8, 0xa4_u8, 0x4b_u8, 0xbe_u8,
      0x7c_u8, 0x01_u8, 0x81_u8, 0x2c_u8, 0xf7_u8, 0x21_u8, 0xad_u8, 0x1c_u8,
      0xde_u8, 0xd4_u8, 0x6d_u8, 0xe9_u8, 0x83_u8, 0x90_u8, 0x97_u8, 0xdb_u8,
      0x72_u8, 0x40_u8, 0xa4_u8, 0xa4_u8, 0xb7_u8, 0xb3_u8, 0x67_u8, 0x1f_u8,
      0xcb_u8, 0x79_u8, 0xe6_u8, 0x4e_u8, 0xcc_u8, 0xc0_u8, 0xe5_u8, 0x78_u8,
      0x82_u8, 0x5a_u8, 0xd0_u8, 0x7d_u8, 0xcc_u8, 0xff_u8, 0x72_u8, 0x21_u8,
      0xb8_u8, 0x08_u8, 0x46_u8, 0x74_u8, 0xf7_u8, 0x43_u8, 0x24_u8, 0x8e_u8,
      0xe0_u8, 0x35_u8, 0x90_u8, 0xe6_u8, 0x81_u8, 0x3a_u8, 0x26_u8, 0x4c_u8,
      0x3c_u8, 0x28_u8, 0x52_u8, 0xbb_u8, 0x91_u8, 0xc3_u8, 0x00_u8, 0xcb_u8,
      0x88_u8, 0xd0_u8, 0x65_u8, 0x8b_u8, 0x1b_u8, 0x53_u8, 0x2e_u8, 0xa3_u8,
      0x71_u8, 0x64_u8, 0x48_u8, 0x97_u8, 0xa2_u8, 0x0d_u8, 0xf9_u8, 0x4e_u8,
      0x38_u8, 0x19_u8, 0xef_u8, 0x46_u8, 0xa9_u8, 0xde_u8, 0xac_u8, 0xd8_u8,
      0xa8_u8, 0xfa_u8, 0x76_u8, 0x3f_u8, 0xe3_u8, 0x9c_u8, 0x34_u8, 0x3f_u8,
      0xf9_u8, 0xdc_u8, 0xbb_u8, 0xc7_u8, 0xc7_u8, 0x0b_u8, 0x4f_u8, 0x1d_u8,
      0x8a_u8, 0x51_u8, 0xe0_u8, 0x4b_u8, 0xcd_u8, 0xb4_u8, 0x59_u8, 0x31_u8,
      0xc8_u8, 0x9f_u8, 0x7e_u8, 0xc9_u8, 0xd9_u8, 0x78_u8, 0x73_u8, 0x64_u8,
      0xea_u8, 0xc5_u8, 0xac_u8, 0x83_u8, 0x34_u8, 0xd3_u8, 0xeb_u8, 0xc3_u8,
      0xc5_u8, 0x81_u8, 0xa0_u8, 0xff_u8, 0xfa_u8, 0x13_u8, 0x63_u8, 0xeb_u8,
      0x17_u8, 0x0d_u8, 0xdd_u8, 0x51_u8, 0xb7_u8, 0xf0_u8, 0xda_u8, 0x49_u8,
      0xd3_u8, 0x16_u8, 0x55_u8, 0x26_u8, 0x29_u8, 0xd4_u8, 0x68_u8, 0x9e_u8,
      0x2b_u8, 0x16_u8, 0xbe_u8, 0x58_u8, 0x7d_u8, 0x47_u8, 0xa1_u8, 0xfc_u8,
      0x8f_u8, 0xf8_u8, 0xb8_u8, 0xd1_u8, 0x7a_u8, 0xd0_u8, 0x31_u8, 0xce_u8,
      0x45_u8, 0xcb_u8, 0x3a_u8, 0x8f_u8, 0x95_u8, 0x16_u8, 0x04_u8, 0x28_u8,
      0xaf_u8, 0xd7_u8, 0xfb_u8, 0xca_u8, 0xbb_u8, 0x4b_u8, 0x40_u8, 0x7e_u8
    ]

    XXH3_INIT_ACC = [
      XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3,
      XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1
    ]

    XXH_PRIME64_1 = 11400714785074694791_u64
    XXH_PRIME64_2 = 14029467366897019727_u64
    XXH_PRIME64_3 = 1609587929392839161_u64
    XXH_PRIME64_4 = 9650029242287828579_u64
    XXH_PRIME64_5 = 2870177450012600261_u64

    XXH_PRIME32_1 = 2654435761_u32
    XXH_PRIME32_2 = 2246822519_u32
    XXH_PRIME32_3 = 3266489917_u32
    XXH_PRIME32_4 = 668265263_u32
    XXH_PRIME32_5 = 374761393_u32

    # The bare minimum size for a custom secret.
    XXH3_SECRET_SIZE_MIN = 136

    # Default size of the secret buffer (and `XXH3_SECRET`).
    XXH3_SECRET_DEFAULT_SIZE = 192

    XXH3_MIDSIZE_MAX = 240
    XXH3_MIDSIZE_STARTOFFSET = 3
    XXH3_MIDSIZE_LASTOFFSET = 17

    XXH_STRIPE_LEN = 64
    XXH_ACC_NB = 8 # XXH_STRIPE_LEN.tdiv(sizeof(UInt64))

    # Number of secret bytes consumed at each accumulation.
    XXH_SECRET_CONSUME_RATE = 8

    # Do not align on 8, so that the secret is different from the accumulator.
    XXH_SECRET_MERGEACCS_START = 11

    # Default size of the secret buffer (and `XXH3_SECRET`).
    XXH_SECRET_DEFAULT_SIZE = 192

    # Not aligned on 8, last secret is different from acc & scrambler.
    XXH_SECRET_LASTACC_START = 7

    #protected MM_SHUFFLE_0_3_0_1 = 0b0011_0001_u8
    #protected MM_SHUFFLE_1_0_3_2 = 0b0100_1110_u8

    # Returns the lower 64 bits of the 128-bit value in `val` as a `UInt64`.
    @[AlwaysInline]
    protected def self.low64(val : UInt128) : UInt64
      val.to_u64!
    end

    # Returns the upper 64 bits of the 128-bit value in `val` as a `UInt64`.
    @[AlwaysInline]
    protected def self.high64(val : UInt128) : UInt64
      (val & 0xFFFFFFFFFFFFFFFF0000000000000000_u128).unsafe_shr(64).to_u64!
    end

    # Creates a `uint128` using `low` for the lower 64 bits and `high` for the
    # upper 64-bits.
    @[AlwaysInline]
    protected def self.makeU128(low : UInt64, high : UInt64) : UInt128
      (high.to_u128! << 64) | low
    end

    @[AlwaysInline]
    protected def self.swap64(x : UInt64) : UInt64
      (x.unsafe_shl(56) & 0xff00000000000000_u64) |
      (x.unsafe_shl(40) & 0x00ff000000000000_u64) |
      (x.unsafe_shl(24) & 0x0000ff0000000000_u64) |
      (x.unsafe_shl(8)  & 0x000000ff00000000_u64) |
      (x.unsafe_shr(8)  & 0x00000000ff000000_u64) |
      (x.unsafe_shr(24) & 0x0000000000ff0000_u64) |
      (x.unsafe_shr(40) & 0x000000000000ff00_u64) |
      (x.unsafe_shr(56) & 0x00000000000000ff_u64)
    end

    @[AlwaysInline]
    protected def self.swap32(x : UInt32) : UInt32
      (x.unsafe_shl(24) & 0xff000000_u32) |
      (x.unsafe_shl(8)  & 0x00ff0000_u32) |
      (x.unsafe_shr(8)  & 0x0000ff00_u32) |
      (x.unsafe_shr(24) & 0x000000ff_u32)
    end

    # 32-bit rotate left.  *x* is the `UInt32` to be rotated, while *r* is the
    # number of bits to rotate it by.  Returns the rotated result.
    @[AlwaysInline]
    protected def self.rotl32(x : UInt32, r : Int) : UInt32
      # Remi: Maybe replace with the standard lib's UInt32#rotate_left method?
      x.unsafe_shl(r) | x.unsafe_shr(32 &- r)
    end

    # 64-bit rotate left.  *x* is the `UInt64` to be rotated, while *r* is the
    # number of bits to rotate it by.  Returns the rotated result.
    @[AlwaysInline]
    protected def self.rotl64(x : UInt64, r : Int) : UInt64
      # Remi: Maybe replace with the standard lib's UInt64#rotate_left method?
      x.unsafe_shl(r) | x.unsafe_shr(64 &- r)
    end

    @[AlwaysInline]
    protected def self.xorShift64(v64 : UInt64, shift : Int) : UInt64
      v64 ^ v64.unsafe_shr(shift)
    end

    # Reads an unaligned 64-bit little endian integer from *ptr*.
    @[AlwaysInline]
    protected def self.readLE64(ptr : Pointer(UInt8)) : UInt64
      ptr.unsafe_as(Pointer(UInt64)).value
    end

    # Reads an unaligned 32-bit little endian integer from *ptr*.
    @[AlwaysInline]
    protected def self.readLE32(ptr : Pointer(UInt8)) : UInt32
      ptr.unsafe_as(Pointer(UInt32)).value
    end

    # Reads the 64-bit little endian integer *v64* to *ptr*.
    @[AlwaysInline]
    protected def self.writeLE64(dest : Pointer(UInt8), v64 : UInt64) : Nil
      dest.unsafe_as(Pointer(UInt64)).value = v64
    end

    # Calculates a 32-bit to 64-bit long multiply.
    @[AlwaysInline]
    protected def self.mult32To64(x : UInt64, y : UInt64) : UInt64
      x.to_u32!.to_u64! &* y.to_u32!.to_u64!
    end

    # Calculates a 64->128-bit long multiply.
    @[AlwaysInline]
    protected def self.mult64To128(lhs : UInt64, rhs : UInt64) : UInt128
      mult64To128Scaler(lhs, rhs)
    end

    # :ditto:
    @[AlwaysInline]
    protected def self.mult64To128Scaler(lhs : UInt64, rhs : UInt64) : UInt128
      lhs.to_u128! &* rhs.to_u128!
      # lolo : UInt64 = mult32To64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF)
      # hilo : UInt64 = mult32To64(lhs >> 32, rhs & 0xFFFFFFFF)
      # lohi : UInt64 = mult32To64(lhs & 0xFFFFFFFF, rhs >> 32)
      # hihi : UInt64 = mult32To64(lhs >> 32, rhs >> 32)
      # cross : UInt64 = (lolo >> 32) + (hilo & 0xFFFFFFFF) + lohi
      # upper : UInt64 = (hilo >> 32) + (cross >> 32) + hihi
      # lower : UInt64 = (cross << 32) | (lolo & 0xFFFFFFFF)

      # (upper.to_u128! << 64) | lower.to_u128!
    end

    # Calculates a 64-bit to 128-bit multiply, then XOR folds it.
    #
    # The reason for the separate function is to prevent passing too many
    # structs around by value. This will hopefully inline the multiply, but we
    # don't force it.
    @[AlwaysInline]
    protected def self.mult128Fold64(lhs : UInt64, rhs : UInt64) : UInt64
      product : UInt128 = mult64To128(lhs, rhs)
      product.to_u64! ^ product.unsafe_shr(64).to_u64!
    end

    # DISCLAIMER: There are known *seed-dependent* multicollisions here due to
    # multiplication by zero, affecting hashes of lengths 17 to 240.
    #
    # However, they are very unlikely.
    #
    # Keep this in mind when using the unseeded XXH3 64-bit variant: As with all
    # unseeded non-cryptographic hashes, it does not attempt to defend itself
    # against specially crafted inputs, only random inputs.
    #
    # Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
    # cancelling out the secret is taken an arbitrary number of times (addressed
    # in #xxh3Accumulate512), this collision is very unlikely with random inputs
    # and/or proper seeding:
    #
    # This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in
    # a function that is only called up to 16 times per hash with up to 240
    # bytes of input.
    #
    # This is not too bad for a non-cryptographic hash function, especially with
    # only 64 bit outputs.
    #
    # The 128-bit variant (which trades some speed for strength) is NOT affected
    # by this, although it is always a good idea to use a proper seed if you
    # care about strength.
    @[AlwaysInline]
    protected def self.mix16B(input : PUInt8, secret : PUInt8, seed : UInt64) : UInt64
      inputLow = readLE64(input)
      inputHigh = readLE64(input + 8)
      mult128Fold64(inputLow ^ (readLE64(secret) &+ seed),
                    inputHigh ^ (readLE64(secret + 8) &- seed))
    end
  end
end