Skip to content

Commit

Permalink
Random: allow string seeds (#51527)
Browse files Browse the repository at this point in the history
We used to be able to seed RNGs with a string, but that string was
interpreted as the filename containing the actual seed. This was
deprecated in #21359, in order to later allow using a string seed
directly, which this patch does.

---------

Co-authored-by: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com>
  • Loading branch information
rfourquet and nhz2 authored Oct 10, 2023
1 parent a857a86 commit f82f0d4
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 10 deletions.
5 changes: 3 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ Standard library changes

#### Random
* `rand` now supports sampling over `Tuple` types ([#35856], [#50251]).
* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).

* `rand` now supports sampling over `Pair` types ([#28705]).
* When seeding RNGs provided by `Random`, negative integer seeds can now be used ([#51416]).
* Seedable random number generators from `Random` can now be seeded by a string, e.g.
`seed!(rng, "a random seed")` ([#51527]).

#### REPL

Expand Down
30 changes: 25 additions & 5 deletions stdlib/Random/src/RNGs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ MersenneTwister(seed, state::DSFMT_state) =
Create a `MersenneTwister` RNG object. Different RNG objects can have
their own seeds, which may be useful for generating different streams
of random numbers.
The `seed` may be an integer or a vector of `UInt32` integers.
The `seed` may be an integer, a string, or a vector of `UInt32` integers.
If no seed is provided, a randomly generated one is created (using entropy from the system).
See the [`seed!`](@ref) function for reseeding an already existing `MersenneTwister` object.
Expand Down Expand Up @@ -316,12 +316,32 @@ function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}})
SHA.digest!(ctx)
end

function hash_seed(str::AbstractString)
ctx = SHA.SHA2_256_CTX()
# convert to String such that `codeunits(str)` below is consistent between equal
# strings of different types
str = String(str)
SHA.update!(ctx, codeunits(str))
# signature for strings: so far, all hash_seed functions end-up hashing a multiple
# of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
# bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
# and then hash the signature 0x05; in order for strings of different lengths to have
# different hashes, padding bytes are set equal to the number of padding bytes
pad = 4 - mod(ncodeunits(str), 4)
for _=1:pad
SHA.update!(ctx, (pad % UInt8,))
end
SHA.update!(ctx, (0x05,))
SHA.digest!(ctx)
end


"""
hash_seed(seed) -> AbstractVector{UInt8}
Return a cryptographic hash of `seed` of size 256 bits (32 bytes).
`seed` can currently be of type `Union{Integer, DenseArray{UInt32}, DenseArray{UInt64}}`,
`seed` can currently be of type
`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
but modules can extend this function for types they own.
`hash_seed` is "injective" : if `n != m`, then `hash_seed(n) != `hash_seed(m)`.
Expand Down Expand Up @@ -750,13 +770,13 @@ jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
# 3, 4: .adv_vals, .idxF (counters to reconstruct the float cache, optional if 5-6 not shown))
# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer cache, optional)

Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) =
Random.MersenneTwister(seed, advance::NTuple{6,Integer}) =
advance!(MersenneTwister(seed), advance...)

Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) =
Random.MersenneTwister(seed, advance::NTuple{4,Integer}) =
MersenneTwister(seed, (advance..., 0, 0))

Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) =
Random.MersenneTwister(seed, advance::NTuple{2,Integer}) =
MersenneTwister(seed, (advance..., 0, 0, 0, 0))

# advances raw state (per fill_array!) of r by n steps (Float64 values)
Expand Down
2 changes: 1 addition & 1 deletion stdlib/Random/src/Xoshiro.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Lots of implementation is shared with TaskLocalRNG

"""
Xoshiro(seed::Integer)
Xoshiro(seed::Union{Integer, AbstractString})
Xoshiro()
Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
Expand Down
25 changes: 23 additions & 2 deletions stdlib/Random/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ end
# test that the following is not an error (#16925)
@test Random.seed!(m..., typemax(UInt)) === m2
@test Random.seed!(m..., typemax(UInt128)) === m2
@test Random.seed!(m..., "a random seed") === m2
end
end

Expand Down Expand Up @@ -710,7 +711,7 @@ end
end

@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), randstring(), randstring(), rand(UInt128, 3)...]
if RNG == Xoshiro
push!(seeds, rand(UInt64, rand(1:4)))
end
Expand All @@ -723,7 +724,7 @@ end
end

@testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
seeds = Any[0, rand(UInt128), rand(UInt64, 4), randstring(20)]

for seed=seeds
Random.seed!(seed)
Expand Down Expand Up @@ -940,6 +941,15 @@ end
@test string(m) == "MersenneTwister(-3)"
Random.seed!(m, typemin(Int8))
@test string(m) == "MersenneTwister(-128)"

# string seeds
Random.seed!(m, "seed 1")
@test string(m) == "MersenneTwister(\"seed 1\")"
x = rand(m)
@test x == rand(MersenneTwister("seed 1"))
@test string(m) == """MersenneTwister("seed 1", (0, 1002, 0, 1))"""
# test that MersenneTwister's fancy constructors accept string seeds
@test MersenneTwister("seed 1", (0, 1002, 0, 1)) == m
end

@testset "RandomDevice" begin
Expand Down Expand Up @@ -1196,6 +1206,17 @@ end
hash32 = Random.hash_seed(seed32)
@test Random.hash_seed(map(UInt64, seed32)) == hash32
@test hash32 keys(vseeds)

seed_str = randstring()
seed_gstr = GenericString(seed_str)
@test Random.hash_seed(seed_str) == Random.hash_seed(seed_gstr)
string_seeds = Set{Vector{UInt8}}()
for ch = 'A':'z'
vseed = Random.hash_seed(string(ch))
@test vseed keys(vseeds)
@test vseed string_seeds
push!(string_seeds, vseed)
end
end

@testset "rand(::Type{<:Pair})" begin
Expand Down

0 comments on commit f82f0d4

Please sign in to comment.