Skip to content

Revert "Integer can be hashed rapidly as well" #58712

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 24 additions & 20 deletions base/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -843,25 +843,24 @@ Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), st

## streamlined hashing for BigInt, by avoiding allocation from shifts ##

Base._hash_shl!(x::BigInt, n) = MPZ.mul_2exp!(x, n)

if Limb === UInt64 === UInt
# On 64 bit systems we can define
# an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}),
# and of hash

using .Base: HASH_SECRET, hash_bytes, hash_finalizer
using .Base: hash_finalizer

function hash_integer(n::BigInt, h::UInt)
GC.@preserve n begin
s = n.size
h ⊻= (s < 0)
hash_bytes(
Ptr{UInt8}(n.d),
8 * abs(s),
h,
HASH_SECRET
)
s == 0 && return hash_integer(0, h)
p = convert(Ptr{UInt64}, n.d)
b = unsafe_load(p)
h ⊻= hash_finalizer(ifelse(s < 0, -b, b) ⊻ h)
for k = 2:abs(s)
h ⊻= hash_finalizer(unsafe_load(p, k) ⊻ h)
end
return h
end
end

Expand Down Expand Up @@ -893,16 +892,21 @@ if Limb === UInt64 === UInt
return hash(ldexp(flipsign(Float64(limb), sz), pow), h)
end
h = hash_integer(pow, h)

h ⊻= (sz < 0)
trailing_zero_bytes = div(pow, 8)
GC.@preserve x begin
h = hash_bytes(
Ptr{UInt8}(x.d) + 8 * trailing_zero_bytes,
8 * (asz - trailing_zero_bytes),
h,
HASH_SECRET
)
h ⊻= hash_finalizer(flipsign(limb, sz) ⊻ h)
for idx = idx+1:asz
if shift == 0
limb = unsafe_load(ptr, idx)
else
limb1 = limb2
if idx == asz
limb = limb1 >> shift
limb == 0 && break # don't hash leading zeros
else
limb2 = unsafe_load(ptr, idx+1)
limb = limb2 << upshift | limb1 >> shift
end
end
h ⊻= hash_finalizer(limb ⊻ h)
end
return h
end
Expand Down
80 changes: 10 additions & 70 deletions base/hashing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,72 +69,17 @@ hash(x::UInt64, h::UInt) = hash_uint64(hash_mix_linear(x, h))
hash(x::Int64, h::UInt) = hash(bitcast(UInt64, x), h)
hash(x::Union{Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32}, h::UInt) = hash(Int64(x), h)

hash_integer(x::Integer, h::UInt) = _hash_integer(x, UInt64(h)) % UInt
function _hash_integer(
x::Integer,
seed::UInt64 = HASH_SEED,
secret::NTuple{3, UInt64} = HASH_SECRET
)
seed ⊻= (x < 0)
u = abs(x)

# always left-pad to multiple of 8 bytes
buflen = UInt(cld(top_set_bit(u), 64) * 8)
seed = seed ⊻ (hash_mix(seed ⊻ secret[1], secret[2]) ⊻ buflen)

a = zero(UInt64)
b = zero(UInt64)

if buflen ≤ 16
a = (UInt64(u % UInt32) << 32) |
UInt64((u >>> ((buflen - 4) * 8)) % UInt32)

delta = (buflen & 24) >>> (buflen >>> 3)

b = (UInt64((u >>> (8 * delta)) % UInt32) << 32) |
UInt64((u >>> (8 * (buflen - 4 - delta))) % UInt32)
else
a = (u >>> 8(buflen - 16)) % UInt
b = (u >>> 8(buflen - 8)) % UInt

i = buflen
if i > 48
see1 = seed
see2 = seed
while i ≥ 48
l0 = u % UInt; u >>>= 64
l1 = u % UInt; u >>>= 64
l2 = u % UInt; u >>>= 64
l3 = u % UInt; u >>>= 64
l4 = u % UInt; u >>>= 64
l5 = u % UInt; u >>>= 64

seed = hash_mix(l0 ⊻ secret[1], l1 ⊻ seed)
see1 = hash_mix(l2 ⊻ secret[2], l3 ⊻ see1)
see2 = hash_mix(l4 ⊻ secret[3], l5 ⊻ see2)
end
seed = seed ⊻ see1 ⊻ see2
i -= 48
end
if i > 16
l0 = u % UInt; u >>>= 64
l1 = u % UInt; u >>>= 64
seed = hash_mix(l0 ⊻ secret[3], l1 ⊻ seed ⊻ secret[2])
if i > 32
l2 = u % UInt; u >>>= 64
l3 = u % UInt; u >>>= 64
seed = hash_mix(l2 ⊻ secret[3], l3 ⊻ seed)
end
end
function hash_integer(n::Integer, h::UInt)
h ⊻= hash_uint((n % UInt) ⊻ h)
n = abs(n)
n >>>= sizeof(UInt) << 3
while n != 0
h ⊻= hash_uint((n % UInt) ⊻ h)
n >>>= sizeof(UInt) << 3
end

a = a ⊻ secret[2]
b = b ⊻ seed
b, a = mul_parts(a, b)
return hash_mix(a ⊻ secret[1] ⊻ buflen, b ⊻ secret[2])
return h
end


## efficient value-based hashing of floats ##

const hx_NaN = hash(reinterpret(UInt64, NaN))
Expand Down Expand Up @@ -172,7 +117,6 @@ function hash(x::Float16, h::UInt)
end

## generic hashing for rational values ##
_hash_shl!(x, n) = (x << n)
function hash(x::Real, h::UInt)
# decompose x as num*2^pow/den
num, pow, den = decompose(x)
Expand All @@ -188,7 +132,6 @@ function hash(x::Real, h::UInt)
den = -den
end
num_z = trailing_zeros(num)

num >>= num_z
den_z = trailing_zeros(den)
den >>= den_z
Expand All @@ -213,10 +156,7 @@ function hash(x::Real, h::UInt)
end
# handle generic rational values
h = hash_integer(pow, h)

# trimming only whole bytes of trailing zeros simplifies greatly
# some specializations for memory-backed bitintegers
h = hash_integer((pow > 0) ? _hash_shl!(num, pow % 8) : num, h)
h = hash_integer(num, h)
return h
end

Expand Down Expand Up @@ -269,7 +209,7 @@ end
else
pos = 1
i = buflen
if i > 48
while i 48
see1 = seed
see2 = seed
while i ≥ 48
Expand Down
2 changes: 1 addition & 1 deletion base/irrationals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ isinteger(::AbstractIrrational) = false
iszero(::AbstractIrrational) = false
isone(::AbstractIrrational) = false

hash(x::Irrational, h::UInt) = 3h - objectid(x)
hash(x::Irrational, h::UInt) = 3*objectid(x) - h

widen(::Type{T}) where {T<:Irrational} = T

Expand Down
2 changes: 1 addition & 1 deletion base/rational.jl
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ function hash(x::Rational{<:BitInteger64}, h::UInt)
end
end
h = hash_integer(pow, h)
h = hash_integer((pow > 0) ? (num << (pow % 64)) : num, h)
h = hash_integer(num, h)
return h
end

Expand Down
12 changes: 3 additions & 9 deletions test/gmp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -811,14 +811,8 @@ end

@testset "hashing" begin
for i in 1:10:100
for shift in 0:3
bint = big(11)^i << shift
bfloat = float(bint)
@test (hash(bint) == hash(bfloat)) == (bint == bfloat)
@test hash(bint, Base.HASH_SEED) ==
@invoke(hash(bint::Real, Base.HASH_SEED))
@test Base.hash_integer(bint, Base.HASH_SEED) ==
@invoke(Base.hash_integer(bint::Integer, Base.HASH_SEED))
end
bint = big(11)^i
bfloat = big(11.0)^i
@test (hash(bint) == hash(bfloat)) == (bint == bfloat)
end
end