From e9cfc26da1ecd8e93b7db6f8350c745bdfba0919 Mon Sep 17 00:00:00 2001 From: Rafael Fourquet Date: Wed, 19 Nov 2014 13:44:49 +0530 Subject: [PATCH] refactor RandIntGen API This is a rewrite of part of #8309 and #8649. The goal is to be able to implement the BigInt case more efficiently. --- base/random.jl | 120 +++++++++++++++++++++++-------------------------- test/random.jl | 8 ++-- 2 files changed, 60 insertions(+), 68 deletions(-) diff --git a/base/random.jl b/base/random.jl index af916e706005e..31a5f8996430b 100644 --- a/base/random.jl +++ b/base/random.jl @@ -1,6 +1,6 @@ module Random -using Base.dSFMT +using Base: dSFMT, IntTypes export srand, rand, rand!, @@ -334,90 +334,82 @@ function rand!{T<:Union(Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64 A end -## Generate random integer within a range +## Pick values randomly from an AbstractArray -# remainder function according to Knuth, where rem_knuth(a, 0) = a -rem_knuth(a::UInt, b::UInt) = a % (b + (b == 0)) + a * (b == 0) -rem_knuth{T<:Unsigned}(a::T, b::T) = b != 0 ? a % b : a +# maxmultiple: precondition: k>0 -# maximum multiple of k <= 2^bits(T) decremented by one, -# that is 0xFFFFFFFF if k = typemax(T) - typemin(T) with intentional underflow -maxmultiple(k::UInt32) = (div(0x0000000100000000,k + (k == 0))*k - 1) % UInt32 -maxmultiple(k::UInt64) = (div(0x00000000000000010000000000000000, k + (k == 0))*k - 1) % UInt64 -# maximum multiple of k within 1:typemax(UInt128) -maxmultiple(k::UInt128) = div(typemax(UInt128), k + (k == 0))*k - 1 -# maximum multiple of k within 1:2^32 or 1:2^64, depending on size -maxmultiplemix(k::UInt64) = (div((k >> 32 != 0)*0x0000000000000000FFFFFFFF00000000 + 0x0000000100000000, k + (k == 0))*k - 1) % UInt64 +# maximum multiple of k <= typemax(k)+1, decremented by one +maxmultiple(k::UInt32) = (div(0x0000000100000000, k)*k - 1) % UInt32 -immutable RandIntGen{T<:Integer, U<:Unsigned} - a::T # first element of the range +maxmultiple_64(k::UInt64) = (div(0x00000000000000010000000000000000, k)*k - 1) % UInt64 +# 32 or 64 bits version depending on size +maxmultiple(k::UInt64) = k >> 32 == 0 ? maxmultiple(k % UInt32) % UInt64 : maxmultiple_64(k) + +# maximum multiple of k <= typemax(UInt128), decremented by one +maxmultiple(k::UInt128) = div(typemax(UInt128), k)*k - 1 + +immutable RandIntGen{T<:Integer, U<:Union(UInt32, UInt64, UInt128)} + # T is the output type k::U # range length or zero for full range u::U # rejection threshold end -# generators with 32, 128 bits entropy -RandIntGen{T, U<:Union(UInt32, UInt128)}(a::T, k::U) = RandIntGen{T, U}(a, k, maxmultiple(k)) -# mixed 32/64 bits entropy generator -RandIntGen{T}(a::T, k::UInt64) = RandIntGen{T,UInt64}(a, k, maxmultiplemix(k)) - -# generator for ranges -RandIntGen{T<:Unsigned}(r::UnitRange{T}) = isempty(r) ? error("range must be non-empty") : RandIntGen(first(r), last(r) - first(r) + one(T)) +@inline RandIntGen{T, U}(::Type{T}, k::U) = RandIntGen{T,U}(k, k == 0 ? typemax(U) : maxmultiple(k)) -# specialized versions -for (T, U) in [(UInt8, UInt32), (UInt16, UInt32), - (Int8, UInt32), (Int16, UInt32), (Int32, UInt32), (Int64, UInt64), (Int128, UInt128), - (Bool, UInt32)] - - @eval RandIntGen(r::UnitRange{$T}) = isempty(r) ? error("range must be non-empty") : RandIntGen(first(r), convert($U, unsigned(last(r) - first(r)) + one($U))) # overflow ok +@inline function rand_lessthan{U}(mt::MersenneTwister, u::U) + while true + x = rand(mt, U) + x <= u && return x + end end # this function uses 32 bit entropy for small ranges of length <= typemax(UInt32) + 1 # RandIntGen is responsible for providing the right value of k -function rand{T<:Union(UInt64, Int64)}(mt::MersenneTwister, g::RandIntGen{T,UInt64}) - local x::UInt64 - if (g.k - 1) >> 32 == 0 - x = rand(mt, UInt32) - while x > g.u - x = rand(mt, UInt32) - end - else - x = rand(mt, UInt64) - while x > g.u - x = rand(mt, UInt64) - end - end - return reinterpret(T, reinterpret(UInt64, g.a) + rem_knuth(x, g.k)) +function rand{T}(mt::MersenneTwister, g::RandIntGen{T, UInt64}) + g.k == 0 && return rand(mt, UInt64) % T + x = (g.k - 1) >> 32 == 0 ? + rand_lessthan(mt, g.u % UInt32) % UInt64 : + rand_lessthan(mt, g.u) + return x % g.k % T end -function rand{T<:Integer, U<:Unsigned}(mt::MersenneTwister, g::RandIntGen{T,U}) - x = rand(mt, U) - while x > g.u - x = rand(mt, U) - end - (unsigned(g.a) + rem_knuth(x, g.k)) % T -end - -rand{T<:Union(Signed,Unsigned,Bool,Char)}(mt::MersenneTwister, r::UnitRange{T}) = rand(mt, RandIntGen(r)) +rand{T, U}(mt::MersenneTwister, g::RandIntGen{T, U}) = (g.k == 0 ? rand(mt, U) : rand_lessthan(mt, g.u) % g.k) % T + +# generator API for ranges: +# inrange(k) returns a helper object for generating uniformly random integers in the range 0:k-1 +# +# note: If k==0 is unsigned, then k-1==typemax(k), this is "full range". +# note: The interval 0:k-1 is the one closest to the machine. It would be natural to return +# instead a random element from 1:k, to index into an AbstractArray, but as UnitRange has a +# specialization which needs to get an element from a:a+k-1, the addition is left to the user +# of inrange. The value of a could be stored in RandIntGen, but in the case of BigInt, this +# would entail an otherwise unnecessary BigInt allocation (`one(Int)+b` is cheaper than +# `one(BigInt)+b` for a BigInt `b`). + +@inline inrange{T<:Union(UInt32,UInt64,UInt128,Int32,Int64,Int128)}(k::T) = RandIntGen(T, unsigned(k)) +@inline inrange{T<:Union(Int8,UInt8,Int16,UInt16)}(k::T) = RandIntGen(T, k % UInt32) + +# wrappers: +# general case: +rangelength(r::AbstractArray) = (isempty(r) && error("collection must be non-empty"); length(r)) +rangeindex(r::AbstractArray, n) = @inbounds return r[1+n] +# note: RandIntGen{T} needs to keep track of the type T of `length(r)` so that `n` above can be +# of type T; otherwise, n would be an unsigned, which causes problems in some cases (e.g. Rational). + +# UnitRange: specialization so that it works with ranges whose length and getindex overflow +rangelength{T<:Union(Bool,IntTypes...)}(r::UnitRange{T}) = (isempty(r) && error("range must be non-empty"); last(r)-first(r)+one(T)) +rangeindex{ T<:Union(Bool,IntTypes...)}(r::UnitRange{T}, n) = (first(r) + n) % T # Randomly draw a sample from an AbstractArray r # (e.g. r is a range 0:2:8 or a vector [2, 3, 5, 7]) -rand(mt::MersenneTwister, r::AbstractArray) = @inbounds return r[rand(mt, 1:length(r))] - -function rand!(mt::MersenneTwister, A::AbstractArray, g::RandIntGen) - for i = 1 : length(A) - @inbounds A[i] = rand(mt, g) - end - return A -end - -rand!{T<:Union(Signed,Unsigned,Bool,Char)}(mt::MersenneTwister, A::AbstractArray, r::UnitRange{T}) = rand!(mt, A, RandIntGen(r)) +rand(mt::MersenneTwister, r::AbstractArray) = rangeindex(r, rand(mt, inrange(rangelength(r)))) function rand!(mt::MersenneTwister, A::AbstractArray, r::AbstractArray) - g = RandIntGen(1:(length(r))) + g = inrange(rangelength(r)) for i = 1 : length(A) - @inbounds A[i] = r[rand(mt, g)] + @inbounds A[i] = rangeindex(r, rand(mt, g)) end - return A + A end rand{T}(mt::MersenneTwister, r::AbstractArray{T}, dims::Dims) = rand!(mt, Array(T, dims), r) diff --git a/test/random.jl b/test/random.jl index 0e5b7105f7501..60de18e9c4d28 100644 --- a/test/random.jl +++ b/test/random.jl @@ -75,8 +75,8 @@ if sizeof(Int32) < sizeof(Int) r = rand(int32(-1):typemax(Int32)) @test typeof(r) == Int32 @test -1 <= r <= typemax(Int32) - @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.RandIntGen(uint64(1:k)).u for k in 13 .+ int64(2).^(32:62)]) - @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.RandIntGen(int64(1:k)).u for k in 13 .+ int64(2).^(32:61)]) + @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.inrange(uint64(k)).u for k in 13 .+ int64(2).^(32:62)]) + @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.inrange(int64(k)).u for k in 13 .+ int64(2).^(32:61)]) end @@ -181,8 +181,8 @@ r = uint64(rand(uint32(97:122))) srand(seed) @test r == rand(uint64(97:122)) -@test all([div(0x000100000000,k)*k - 1 == Base.Random.RandIntGen(uint64(1:k)).u for k in 13 .+ int64(2).^(1:30)]) -@test all([div(0x000100000000,k)*k - 1 == Base.Random.RandIntGen(int64(1:k)).u for k in 13 .+ int64(2).^(1:30)]) +@test all([div(0x000100000000,k)*k - 1 == Base.Random.inrange(uint64(k)).u for k in 13 .+ int64(2).^(1:30)]) +@test all([div(0x000100000000,k)*k - 1 == Base.Random.inrange(int64(k)).u for k in 13 .+ int64(2).^(1:30)]) import Base.Random: uuid4, UUID