Skip to content

Commit 5544f1a

Browse files
committed
Base: make constructing a float from a rational exact
Implementation approach: 1. Convert the (numerator, denominator) pair to a (sign bit, integral significand, exponent) triplet using integer arithmetic. The integer type in question must be wide enough. 2. Convert the above triplet into an instance of the chosen FP type. There is special support for IEEE 754 floating-point and for `BigFloat`, otherwise a fallback using `ldexp` is used. As a bonus, constructing a `BigFloat` from a `Rational` should now be thread-safe when the rounding mode and precision are provided to the constructor, because there is no access to the global precision or rounding mode settings. Updates #45213 Updates #50940 Updates #52507 Fixes #52394 Closes #52395 Fixes #52859
1 parent 46e6f23 commit 5544f1a

File tree

7 files changed

+749
-11
lines changed

7 files changed

+749
-11
lines changed

base/Base.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ include("rounding.jl")
253253
include("div.jl")
254254
include("rawbigints.jl")
255255
include("float.jl")
256+
include("rational_to_float.jl")
256257
include("twiceprecision.jl")
257258
include("complex.jl")
258259
include("rational.jl")

base/mpfr.jl

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ import
1919
isone, big, _string_n, decompose, minmax,
2020
sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
2121
uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
22-
RawBigIntRoundingIncrementHelper, truncated, RawBigInt
22+
RawBigIntRoundingIncrementHelper, truncated, RawBigInt, unsafe_rational,
23+
RationalToFloat, rational_to_floating_point
2324

2425

2526
using .Base.Libc
@@ -310,12 +311,51 @@ BigFloat(x::Union{UInt8,UInt16,UInt32}, r::MPFRRoundingMode=ROUNDING_MODE[]; pre
310311
BigFloat(x::Union{Float16,Float32}, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[]) =
311312
BigFloat(Float64(x), r; precision=precision)
312313

313-
function BigFloat(x::Rational, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
314-
setprecision(BigFloat, precision) do
315-
setrounding_raw(BigFloat, r) do
316-
BigFloat(numerator(x))::BigFloat / BigFloat(denominator(x))::BigFloat
314+
function set_2exp!(z::BigFloat, n::BigInt, exp::Int, rm::MPFRRoundingMode)
315+
ccall(
316+
(:mpfr_set_z_2exp, libmpfr),
317+
Int32,
318+
(Ref{BigFloat}, Ref{BigInt}, Int, MPFRRoundingMode),
319+
z, n, exp, rm,
320+
)
321+
nothing
322+
end
323+
324+
function RationalToFloat.to_floating_point_impl(::Type{BigFloat}, ::Type{BigInt}, num, den, romo, prec)
325+
num_is_zero = iszero(num)
326+
den_is_zero = iszero(den)
327+
s = Int8(sign(num))
328+
sb = signbit(s)
329+
is_zero = num_is_zero & !den_is_zero
330+
is_inf = !num_is_zero & den_is_zero
331+
is_regular = !num_is_zero & !den_is_zero
332+
333+
if is_regular
334+
let rtfc = RationalToFloat.to_float_components
335+
c = rtfc(BigInt, num, den, prec, nothing, romo, sb)
336+
ret = BigFloat(precision = prec)
337+
mpfr_romo = convert(MPFRRoundingMode, romo)
338+
set_2exp!(ret, s * c.integral_significand, Int(c.exponent - prec + true), mpfr_romo)
339+
ret
317340
end
318-
end
341+
else
342+
if is_zero
343+
BigFloat(false, MPFRRoundToZero, precision = prec)
344+
elseif is_inf
345+
BigFloat(s * Inf, MPFRRoundToZero, precision = prec)
346+
else
347+
BigFloat(precision = prec)
348+
end
349+
end::BigFloat
350+
end
351+
352+
function BigFloat(x::Rational, r::RoundingMode; precision::Integer = DEFAULT_PRECISION[])
353+
rational_to_floating_point(BigFloat, x, r, precision)
354+
end
355+
356+
function BigFloat(x::Rational, r::MPFRRoundingMode = ROUNDING_MODE[];
357+
precision::Integer = DEFAULT_PRECISION[])
358+
rational_to_floating_point(BigFloat, x, r, precision)
319359
end
320360

321361
function tryparse(::Type{BigFloat}, s::AbstractString; base::Integer=0, precision::Integer=DEFAULT_PRECISION[], rounding::MPFRRoundingMode=ROUNDING_MODE[])

base/rational.jl

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,21 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
140140
(::Type{T})(x::Rational) where {T<:Integer} = (isinteger(x) ? convert(T, x.num)::T :
141141
throw(InexactError(nameof(T), T, x)))
142142

143-
AbstractFloat(x::Rational) = (float(x.num)/float(x.den))::AbstractFloat
144-
function (::Type{T})(x::Rational{S}) where T<:AbstractFloat where S
145-
P = promote_type(T,S)
146-
convert(T, convert(P,x.num)/convert(P,x.den))::T
143+
function numerator_denominator_promoted(x)
144+
y = unsafe_rational(numerator(x), denominator(x))
145+
(numerator(y), denominator(y))
146+
end
147+
148+
function rational_to_floating_point(::Type{F}, x, rm = RoundNearest, prec = precision(F)) where {F}
149+
nd = numerator_denominator_promoted(x)
150+
RationalToFloat.to_floating_point(F, nd..., rm, prec)::F
151+
end
152+
153+
(::Type{F})(x::Rational) where {F<:AbstractFloat} = rational_to_floating_point(F, x)::F
154+
155+
function AbstractFloat(x::Q) where {Q<:Rational}
156+
T = float(Q)
157+
T(x)::T::AbstractFloat
147158
end
148159

149160
function Rational{T}(x::AbstractFloat) where T<:Integer

base/rational_to_float.jl

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
# This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
module RationalToFloat
4+
5+
const Rnd = Base.Rounding
6+
7+
# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed
8+
# to compile to a single instruction, because the semantics correspond
9+
# to what hardware usually provides.
10+
function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger}
11+
@inline begin
12+
mask = 8*sizeof(T) - 1
13+
c = b & mask
14+
shift(a, c)
15+
end
16+
end
17+
18+
machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported")
19+
20+
# Fallback for `BigInt` etc.
21+
machine_shift(shift::S, a, b) where {S} = shift(a, b)
22+
23+
# Arguments are positive integers.
24+
function div_significand_with_remainder(num, den, minimum_significand_size)
25+
clamped = x -> max(zero(x), x)::typeof(x)
26+
bw = Base.top_set_bit # bit width
27+
shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2)
28+
t = machine_shift(<<, num, shift)
29+
(divrem(t, den, RoundToZero)..., shift)
30+
end
31+
32+
# `divrem(n, 1<<k, RoundToZero)`
33+
function divrem_2(n, k)
34+
quo = machine_shift(>>>, n, k)
35+
tmp = machine_shift(<<, quo, k)
36+
rem = n - tmp
37+
(quo, rem)
38+
end
39+
40+
function to_float_components_impl(num, den, precision, max_subnormal_exp)
41+
# `+1` because we need an extra, "round", bit for some rounding modes.
42+
#
43+
# TODO: as a performance optimization, only do this when required
44+
# by the rounding mode
45+
prec_p_1 = precision + true
46+
47+
(quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1)
48+
width = Base.top_set_bit(quo0)
49+
excess_width = width - prec_p_1
50+
exp = width - shift - true
51+
52+
exp_underflow = if isnothing(max_subnormal_exp)
53+
zero(exp)
54+
else
55+
let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T
56+
(signbit(d) ? z : d + true)::T
57+
end
58+
end
59+
60+
(quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width)
61+
(quo2, rem2) = (quo1 >>> true, quo1 % Bool)
62+
63+
round_bit = rem2
64+
sticky_bit = !iszero(rem1) | !iszero(rem0)
65+
66+
(; integral_significand = quo2, exponent = exp, round_bit, sticky_bit)
67+
end
68+
69+
struct RoundingIncrementHelper
70+
final_bit::Bool
71+
round_bit::Bool
72+
sticky_bit::Bool
73+
end
74+
75+
(h::RoundingIncrementHelper)(::Rnd.FinalBit) = h.final_bit
76+
(h::RoundingIncrementHelper)(::Rnd.RoundBit) = h.round_bit
77+
(h::RoundingIncrementHelper)(::Rnd.StickyBit) = h.sticky_bit
78+
79+
function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit)
80+
# Helps ensure type stability
81+
fun = function(f::F, v) where {F}
82+
r = f(v)
83+
T = typeof(r)
84+
(T(v)::T, r)::NTuple{2,T}
85+
end
86+
87+
fun_sig = x -> x >>> true
88+
fun_exp = x -> x + true
89+
overflows = (x, p) -> x == machine_shift(<<, one(x), p)
90+
91+
t = to_float_components_impl(num, den, precision, max_subnormal_exp)
92+
raw_significand = t.integral_significand
93+
rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit)
94+
incr = Rnd.correct_rounding_requires_increment(rh, romo, sign_bit)
95+
rounded = raw_significand + incr
96+
(integral_significand, exponent) = let exp = t.exponent, s01, e01, se0, se1, T
97+
s01 = fun(fun_sig, rounded)
98+
e01 = fun(fun_exp, exp)
99+
se0 = (first(s01), first(e01))
100+
se1 = ( last(s01), last(e01))
101+
T = typeof(se0)
102+
(overflows(rounded, precision) ? se1 : se0)::T
103+
end
104+
(; integral_significand, exponent)
105+
end
106+
107+
function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T}
108+
to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb)
109+
end
110+
111+
function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
112+
num_is_zero = iszero(num)
113+
den_is_zero = iszero(den)
114+
sb = signbit(num)
115+
is_zero = num_is_zero & !den_is_zero
116+
is_inf = !num_is_zero & den_is_zero
117+
is_regular = !num_is_zero & !den_is_zero
118+
if is_regular
119+
let
120+
c = to_float_components(S, num, den, prec, nothing, rm, sb)
121+
exp = c.exponent
122+
signif = T(c.integral_significand)::T
123+
let x = ldexp(signif, exp - prec + true)::T
124+
sb ? -x : x
125+
end::T
126+
end
127+
else
128+
if is_zero
129+
zero(T)::T
130+
elseif is_inf
131+
T(Inf)::T
132+
else
133+
T(NaN)::T
134+
end
135+
end::T
136+
end
137+
138+
function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S}
139+
to_floating_point_fallback(T, S, num, den, rm, prec)
140+
end
141+
142+
function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S}
143+
num_is_zero = iszero(num)
144+
den_is_zero = iszero(den)
145+
sb = signbit(num)
146+
is_zero = num_is_zero & !den_is_zero
147+
is_inf = !num_is_zero & den_is_zero
148+
is_regular = !num_is_zero & !den_is_zero
149+
(rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm)
150+
(false, false)
151+
else
152+
let from = Rnd.rounds_away_from_zero(rm, sb)
153+
(!from, from)
154+
end
155+
end::NTuple{2,Bool}
156+
exp_max = Base.exponent_max(T)
157+
exp_min = Base.exponent_min(T)
158+
ieee_repr = Base.ieee754_representation
159+
repr_zero = ieee_repr(T, sb, Val(:zero))
160+
repr_inf = ieee_repr(T, sb, Val(:inf))
161+
repr_nan = ieee_repr(T, sb, Val(:nan))
162+
U = typeof(repr_zero)
163+
repr_zero::U
164+
repr_inf::U
165+
repr_nan::U
166+
167+
ret_u = if is_regular
168+
let
169+
c = let e = exp_min - 1
170+
to_float_components(S, num, den, prec, e, rm, sb)
171+
end
172+
exp = c.exponent
173+
exp_diff = exp - exp_min
174+
is_normal = 0 exp_diff
175+
exp_is_huge_p = exp_max < exp
176+
exp_is_huge_n = signbit(exp_diff + prec)
177+
rounds_to_inf = exp_is_huge_p & !rm_is_to_zero
178+
rounds_to_zero = exp_is_huge_n & !rm_is_from_zero
179+
180+
if !rounds_to_zero & !exp_is_huge_p
181+
let signif = (c.integral_significand % U) & Base.significand_mask(T)
182+
exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U
183+
ieee_repr(T, sb, exp_field, signif)::U
184+
end
185+
elseif rounds_to_zero
186+
repr_zero
187+
elseif rounds_to_inf
188+
repr_inf
189+
else
190+
ieee_repr(T, sb, Val(:omega))
191+
end
192+
end
193+
else
194+
if is_zero
195+
repr_zero
196+
elseif is_inf
197+
repr_inf
198+
else
199+
repr_nan
200+
end
201+
end::U
202+
203+
reinterpret(T, ret_u)::T
204+
end
205+
206+
# `BigInt` is a safe default.
207+
to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt
208+
209+
const BitIntegerOrBool = Union{Bool,Base.BitInteger}
210+
211+
# As an optimization, use an integer type narrower than `BigInt` when possible.
212+
function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool}
213+
Max = if sizeof(F) sizeof(S)
214+
S
215+
else
216+
(S <: Signed) ? Base.inttype(F) : Base.uinttype(F)
217+
end
218+
widen(Max)
219+
end
220+
221+
function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T}
222+
S = to_float_promote_type(F, T)
223+
to_floating_point_impl(F, S, num, den, rm, prec)
224+
end
225+
226+
end

test/choosetests.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ const TESTNAMES = [
1111
"char", "strings", "triplequote", "unicode", "intrinsics",
1212
"dict", "hashing", "iobuffer", "staged", "offsetarray",
1313
"arrayops", "tuple", "reduce", "reducedim", "abstractarray",
14-
"intfuncs", "simdloop", "vecelement", "rational",
14+
"intfuncs", "simdloop", "vecelement", "rational", "rational_to_float",
1515
"bitarray", "copy", "math", "fastmath", "functional", "iterators",
1616
"operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
1717
"sorting", "spawn", "backtrace", "exceptions",

0 commit comments

Comments
 (0)