Skip to content

Commit 6aa9a35

Browse files
committed
Implement faster thread local rng for the scheduler.
1 parent 5230d27 commit 6aa9a35

File tree

1 file changed

+50
-1
lines changed

1 file changed

+50
-1
lines changed

base/partr.jl

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,56 @@ const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
2020
const heaps_lock = [SpinLock(), SpinLock()]
2121

2222

23-
cong(max::UInt32) = iszero(max) ? UInt32(0) : ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)
23+
"""
24+
cong(max::UInt32)
25+
Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0.
26+
"""
27+
cong(max::UInt32) = iszero(max) ? UInt32(0) : jl_rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check
28+
29+
"""
30+
jl_rand_ptls(max::UInt32)
31+
Return a random UInt32 in the range `0:max-1` using the thread-local RNG
32+
state. Max must be greater than 0.
33+
"""
34+
Base.@assume_effects :removable :inaccessiblememonly :notaskstate function jl_rand_ptls(max::UInt32)
35+
# Are these effects correct? We are technically lying to the compiler
36+
# Though these are the same lies we tell to say that an unexcaped allocation has no effects
37+
ptls = Base.unsafe_convert(Ptr{UInt64}, Core.getptls())
38+
rngseed = Base.unsafe_load(ptls, 2) # TODO: What's the best way to do this for 32bit.
39+
val, seed = rand_uniform_max_int32(max, rngseed)
40+
Base.unsafe_store!(ptls, seed, 2)
41+
return val % UInt32
42+
end
43+
44+
# This implementation is based on OpenSSLs implementation of rand_uniform
45+
# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99
46+
# Comments are vendored from their implementation as well.
47+
# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143.
48+
49+
# Essentially it boils down to incrementally generating a fixed point
50+
# number on the interval [0, 1) and multiplying this number by the upper
51+
# range limit. Once it is certain what the fractional part contributes to
52+
# the integral part of the product, the algorithm has produced a definitive
53+
# result.
54+
"""
55+
rand_uniform_max_int32(max::UInt32, seed::UInt64)
56+
Return a random UInt32 in the range `0:max-1` using the given seed.
57+
Max must be greater than 0.
58+
"""
59+
Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64)
60+
if max == UInt32(1)
61+
return UInt32(0), seed
62+
end
63+
# We are generating a fixed point number on the interval [0, 1).
64+
# Multiplying this by the range gives us a number on [0, upper).
65+
# The high word of the multiplication result represents the integral part
66+
# This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes
67+
seed = UInt64(69069) * seed + UInt64(362437)
68+
prod = (UInt64(max)) * (seed % UInt32) # 64 bit product
69+
i = prod >> 32 % UInt32 # integral part
70+
return i % UInt32, seed
71+
end
72+
2473

2574

2675
function multiq_sift_up(heap::taskheap, idx::Int32)

0 commit comments

Comments
 (0)