From e9cfc26da1ecd8e93b7db6f8350c745bdfba0919 Mon Sep 17 00:00:00 2001
From: Rafael Fourquet <fourquet.rafael@gmail.com>
Date: Wed, 19 Nov 2014 13:44:49 +0530
Subject: [PATCH] refactor RandIntGen API

This is a rewrite of part of #8309 and #8649.
The goal is to be able to implement the BigInt case more efficiently.
---
 base/random.jl | 120 +++++++++++++++++++++++--------------------------
 test/random.jl |   8 ++--
 2 files changed, 60 insertions(+), 68 deletions(-)

diff --git a/base/random.jl b/base/random.jl
index af916e706005e..31a5f8996430b 100644
--- a/base/random.jl
+++ b/base/random.jl
@@ -1,6 +1,6 @@
 module Random
 
-using Base.dSFMT
+using Base: dSFMT, IntTypes
 
 export srand,
        rand, rand!,
@@ -334,90 +334,82 @@ function rand!{T<:Union(Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64
     A
 end
 
-## Generate random integer within a range
+## Pick values randomly from an AbstractArray
 
-# remainder function according to Knuth, where rem_knuth(a, 0) = a
-rem_knuth(a::UInt, b::UInt) = a % (b + (b == 0)) + a * (b == 0)
-rem_knuth{T<:Unsigned}(a::T, b::T) = b != 0 ? a % b : a
+# maxmultiple: precondition: k>0
 
-# maximum multiple of k <= 2^bits(T) decremented by one,
-# that is 0xFFFFFFFF if k = typemax(T) - typemin(T) with intentional underflow
-maxmultiple(k::UInt32) = (div(0x0000000100000000,k + (k == 0))*k - 1) % UInt32
-maxmultiple(k::UInt64) = (div(0x00000000000000010000000000000000, k + (k == 0))*k - 1) % UInt64
-# maximum multiple of k within 1:typemax(UInt128)
-maxmultiple(k::UInt128) = div(typemax(UInt128), k + (k == 0))*k - 1
-# maximum multiple of k within 1:2^32 or 1:2^64, depending on size
-maxmultiplemix(k::UInt64) = (div((k >> 32 != 0)*0x0000000000000000FFFFFFFF00000000 + 0x0000000100000000, k + (k == 0))*k - 1) % UInt64
+# maximum multiple of k <= typemax(k)+1, decremented by one
+maxmultiple(k::UInt32) = (div(0x0000000100000000, k)*k - 1) % UInt32
 
-immutable RandIntGen{T<:Integer, U<:Unsigned}
-    a::T   # first element of the range
+maxmultiple_64(k::UInt64) = (div(0x00000000000000010000000000000000, k)*k - 1) % UInt64
+# 32 or 64 bits version depending on size
+maxmultiple(k::UInt64) = k >> 32 == 0 ? maxmultiple(k % UInt32) % UInt64 : maxmultiple_64(k)
+
+# maximum multiple of k <= typemax(UInt128), decremented by one
+maxmultiple(k::UInt128) = div(typemax(UInt128), k)*k - 1
+
+immutable RandIntGen{T<:Integer, U<:Union(UInt32, UInt64, UInt128)}
+    # T is the output type
     k::U   # range length or zero for full range
     u::U   # rejection threshold
 end
-# generators with 32, 128 bits entropy
-RandIntGen{T, U<:Union(UInt32, UInt128)}(a::T, k::U) = RandIntGen{T, U}(a, k, maxmultiple(k))
-# mixed 32/64 bits entropy generator
-RandIntGen{T}(a::T, k::UInt64) = RandIntGen{T,UInt64}(a, k, maxmultiplemix(k))
-
 
-# generator for ranges
-RandIntGen{T<:Unsigned}(r::UnitRange{T}) = isempty(r) ? error("range must be non-empty") : RandIntGen(first(r), last(r) - first(r) + one(T))
+@inline RandIntGen{T, U}(::Type{T}, k::U) = RandIntGen{T,U}(k, k == 0 ? typemax(U) : maxmultiple(k))
 
-# specialized versions
-for (T, U) in [(UInt8, UInt32), (UInt16, UInt32),
-               (Int8, UInt32), (Int16, UInt32), (Int32, UInt32), (Int64, UInt64), (Int128, UInt128),
-               (Bool, UInt32)]
-
-    @eval RandIntGen(r::UnitRange{$T}) = isempty(r) ? error("range must be non-empty") : RandIntGen(first(r), convert($U, unsigned(last(r) - first(r)) + one($U))) # overflow ok
+@inline function rand_lessthan{U}(mt::MersenneTwister, u::U)
+    while true
+        x = rand(mt, U)
+        x <= u && return x
+    end
 end
 
 # this function uses 32 bit entropy for small ranges of length <= typemax(UInt32) + 1
 # RandIntGen is responsible for providing the right value of k
-function rand{T<:Union(UInt64, Int64)}(mt::MersenneTwister, g::RandIntGen{T,UInt64})
-    local x::UInt64
-    if (g.k - 1) >> 32 == 0
-        x = rand(mt, UInt32)
-        while x > g.u
-            x = rand(mt, UInt32)
-        end
-    else
-        x = rand(mt, UInt64)
-        while x > g.u
-            x = rand(mt, UInt64)
-        end
-    end
-    return reinterpret(T, reinterpret(UInt64, g.a) + rem_knuth(x, g.k))
+function rand{T}(mt::MersenneTwister, g::RandIntGen{T, UInt64})
+    g.k == 0 && return rand(mt, UInt64) % T
+    x = (g.k - 1) >> 32 == 0 ?
+            rand_lessthan(mt, g.u % UInt32) % UInt64 :
+            rand_lessthan(mt, g.u)
+    return x % g.k % T
 end
 
-function rand{T<:Integer, U<:Unsigned}(mt::MersenneTwister, g::RandIntGen{T,U})
-    x = rand(mt, U)
-    while x > g.u
-        x = rand(mt, U)
-    end
-    (unsigned(g.a) + rem_knuth(x, g.k)) % T
-end
-
-rand{T<:Union(Signed,Unsigned,Bool,Char)}(mt::MersenneTwister, r::UnitRange{T}) = rand(mt, RandIntGen(r))
+rand{T, U}(mt::MersenneTwister, g::RandIntGen{T, U}) = (g.k == 0 ? rand(mt, U) : rand_lessthan(mt, g.u) % g.k) % T
+
+# generator API for ranges:
+# inrange(k) returns a helper object for generating uniformly random integers in the range 0:k-1
+#
+# note: If k==0 is unsigned, then k-1==typemax(k), this is "full range".
+# note: The interval 0:k-1 is the one closest to the machine. It would be natural to return
+# instead a random element from 1:k, to index into an AbstractArray, but as UnitRange has a
+# specialization which needs to get an element from a:a+k-1, the addition is left to the user
+# of inrange. The value of a could be stored in RandIntGen, but in the case of BigInt, this
+# would entail an otherwise unnecessary BigInt allocation (`one(Int)+b` is cheaper than
+# `one(BigInt)+b` for a BigInt `b`).
+
+@inline inrange{T<:Union(UInt32,UInt64,UInt128,Int32,Int64,Int128)}(k::T) = RandIntGen(T, unsigned(k))
+@inline inrange{T<:Union(Int8,UInt8,Int16,UInt16)}(k::T) = RandIntGen(T, k % UInt32)
+
+# wrappers:
+# general case:
+rangelength(r::AbstractArray) = (isempty(r) && error("collection must be non-empty"); length(r))
+rangeindex(r::AbstractArray, n) = @inbounds return r[1+n]
+# note: RandIntGen{T} needs to keep track of the type T of `length(r)` so that `n` above can be
+# of type T; otherwise, n would be an unsigned, which causes problems in some cases (e.g. Rational).
+
+# UnitRange: specialization so that it works with ranges whose length and getindex overflow
+rangelength{T<:Union(Bool,IntTypes...)}(r::UnitRange{T}) = (isempty(r) && error("range must be non-empty"); last(r)-first(r)+one(T))
+rangeindex{ T<:Union(Bool,IntTypes...)}(r::UnitRange{T}, n) = (first(r) + n) % T
 
 # Randomly draw a sample from an AbstractArray r
 # (e.g. r is a range 0:2:8 or a vector [2, 3, 5, 7])
-rand(mt::MersenneTwister, r::AbstractArray) = @inbounds return r[rand(mt, 1:length(r))]
-
-function rand!(mt::MersenneTwister, A::AbstractArray, g::RandIntGen)
-    for i = 1 : length(A)
-        @inbounds A[i] = rand(mt, g)
-    end
-    return A
-end
-
-rand!{T<:Union(Signed,Unsigned,Bool,Char)}(mt::MersenneTwister, A::AbstractArray, r::UnitRange{T}) = rand!(mt, A, RandIntGen(r))
+rand(mt::MersenneTwister, r::AbstractArray) = rangeindex(r, rand(mt, inrange(rangelength(r))))
 
 function rand!(mt::MersenneTwister, A::AbstractArray, r::AbstractArray)
-    g = RandIntGen(1:(length(r)))
+    g = inrange(rangelength(r))
     for i = 1 : length(A)
-        @inbounds A[i] = r[rand(mt, g)]
+        @inbounds A[i] = rangeindex(r, rand(mt, g))
     end
-    return A
+    A
 end
 
 rand{T}(mt::MersenneTwister, r::AbstractArray{T}, dims::Dims) = rand!(mt, Array(T, dims), r)
diff --git a/test/random.jl b/test/random.jl
index 0e5b7105f7501..60de18e9c4d28 100644
--- a/test/random.jl
+++ b/test/random.jl
@@ -75,8 +75,8 @@ if sizeof(Int32) < sizeof(Int)
     r = rand(int32(-1):typemax(Int32))
     @test typeof(r) == Int32
     @test -1 <= r <= typemax(Int32)
-    @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.RandIntGen(uint64(1:k)).u for k in 13 .+ int64(2).^(32:62)])
-    @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.RandIntGen(int64(1:k)).u for k in 13 .+ int64(2).^(32:61)])
+    @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.inrange(uint64(k)).u for k in 13 .+ int64(2).^(32:62)])
+    @test all([div(0x00010000000000000000,k)*k - 1 == Base.Random.inrange(int64(k)).u for k in 13 .+ int64(2).^(32:61)])
 
 end
 
@@ -181,8 +181,8 @@ r = uint64(rand(uint32(97:122)))
 srand(seed)
 @test r == rand(uint64(97:122))
 
-@test all([div(0x000100000000,k)*k - 1 == Base.Random.RandIntGen(uint64(1:k)).u for k in 13 .+ int64(2).^(1:30)])
-@test all([div(0x000100000000,k)*k - 1 == Base.Random.RandIntGen(int64(1:k)).u for k in 13 .+ int64(2).^(1:30)])
+@test all([div(0x000100000000,k)*k - 1 == Base.Random.inrange(uint64(k)).u for k in 13 .+ int64(2).^(1:30)])
+@test all([div(0x000100000000,k)*k - 1 == Base.Random.inrange(int64(k)).u for k in 13 .+ int64(2).^(1:30)])
 
 import Base.Random: uuid4, UUID