|
| 1 | +# This file is a part of Julia. License is MIT: https://julialang.org/license |
| 2 | + |
| 3 | +module RationalToFloat |
| 4 | + |
| 5 | +const Rnd = Base.Rounding |
| 6 | + |
| 7 | +# Performance optimization. Unlike raw `<<` or `>>>`, this is supposed |
| 8 | +# to compile to a single instruction, because the semantics correspond |
| 9 | +# to what hardware usually provides. |
| 10 | +function machine_shift(shift::S, a::T, b) where {S,T<:Base.BitInteger} |
| 11 | + @inline begin |
| 12 | + mask = 8*sizeof(T) - 1 |
| 13 | + c = b & mask |
| 14 | + shift(a, c) |
| 15 | + end |
| 16 | +end |
| 17 | + |
| 18 | +machine_shift(::S, a::Bool, ::Any) where {S} = error("unsupported") |
| 19 | + |
| 20 | +# Fallback for `BigInt` etc. |
| 21 | +machine_shift(shift::S, a, b) where {S} = shift(a, b) |
| 22 | + |
| 23 | +# Arguments are positive integers. |
| 24 | +function div_significand_with_remainder(num, den, minimum_significand_size) |
| 25 | + clamped = x -> max(zero(x), x)::typeof(x) |
| 26 | + bw = Base.top_set_bit # bit width |
| 27 | + shift = clamped(minimum_significand_size + bw(den) - bw(num) + 0x2) |
| 28 | + t = machine_shift(<<, num, shift) |
| 29 | + (divrem(t, den, RoundToZero)..., shift) |
| 30 | +end |
| 31 | + |
| 32 | +# `divrem(n, 1<<k, RoundToZero)` |
| 33 | +function divrem_2(n, k) |
| 34 | + quo = machine_shift(>>>, n, k) |
| 35 | + tmp = machine_shift(<<, quo, k) |
| 36 | + rem = n - tmp |
| 37 | + (quo, rem) |
| 38 | +end |
| 39 | + |
| 40 | +function to_float_components_impl(num, den, precision, max_subnormal_exp) |
| 41 | + # `+1` because we need an extra, "round", bit for some rounding modes. |
| 42 | + # |
| 43 | + # TODO: as a performance optimization, only do this when required |
| 44 | + # by the rounding mode |
| 45 | + prec_p_1 = precision + true |
| 46 | + |
| 47 | + (quo0, rem0, shift) = div_significand_with_remainder(num, den, prec_p_1) |
| 48 | + width = Base.top_set_bit(quo0) |
| 49 | + excess_width = width - prec_p_1 |
| 50 | + exp = width - shift - true |
| 51 | + |
| 52 | + exp_underflow = if isnothing(max_subnormal_exp) |
| 53 | + zero(exp) |
| 54 | + else |
| 55 | + let d = max_subnormal_exp - exp, T = typeof(d), z = zero(d)::T |
| 56 | + (signbit(d) ? z : d + true)::T |
| 57 | + end |
| 58 | + end |
| 59 | + |
| 60 | + (quo1, rem1) = divrem_2(quo0, exp_underflow + excess_width) |
| 61 | + (quo2, rem2) = (quo1 >>> true, quo1 % Bool) |
| 62 | + |
| 63 | + round_bit = rem2 |
| 64 | + sticky_bit = !iszero(rem1) | !iszero(rem0) |
| 65 | + |
| 66 | + (; integral_significand = quo2, exponent = exp, round_bit, sticky_bit) |
| 67 | +end |
| 68 | + |
| 69 | +struct RoundingIncrementHelper |
| 70 | + final_bit::Bool |
| 71 | + round_bit::Bool |
| 72 | + sticky_bit::Bool |
| 73 | +end |
| 74 | + |
| 75 | +(h::RoundingIncrementHelper)(::Rnd.FinalBit) = h.final_bit |
| 76 | +(h::RoundingIncrementHelper)(::Rnd.RoundBit) = h.round_bit |
| 77 | +(h::RoundingIncrementHelper)(::Rnd.StickyBit) = h.sticky_bit |
| 78 | + |
| 79 | +function to_float_components_rounded(num, den, precision, max_subnormal_exp, romo, sign_bit) |
| 80 | + # Helps ensure type stability |
| 81 | + fun = function(f::F, v) where {F} |
| 82 | + r = f(v) |
| 83 | + T = typeof(r) |
| 84 | + (T(v)::T, r)::NTuple{2,T} |
| 85 | + end |
| 86 | + |
| 87 | + fun_sig = x -> x >>> true |
| 88 | + fun_exp = x -> x + true |
| 89 | + overflows = (x, p) -> x == machine_shift(<<, one(x), p) |
| 90 | + |
| 91 | + t = to_float_components_impl(num, den, precision, max_subnormal_exp) |
| 92 | + raw_significand = t.integral_significand |
| 93 | + rh = RoundingIncrementHelper(raw_significand % Bool, t.round_bit, t.sticky_bit) |
| 94 | + incr = Rnd.correct_rounding_requires_increment(rh, romo, sign_bit) |
| 95 | + rounded = raw_significand + incr |
| 96 | + (integral_significand, exponent) = let exp = t.exponent, s01, e01, se0, se1, T |
| 97 | + s01 = fun(fun_sig, rounded) |
| 98 | + e01 = fun(fun_exp, exp) |
| 99 | + se0 = (first(s01), first(e01)) |
| 100 | + se1 = ( last(s01), last(e01)) |
| 101 | + T = typeof(se0) |
| 102 | + (overflows(rounded, precision) ? se1 : se0)::T |
| 103 | + end |
| 104 | + (; integral_significand, exponent) |
| 105 | +end |
| 106 | + |
| 107 | +function to_float_components(::Type{T}, num, den, precision, max_subnormal_exp, romo, sb) where {T} |
| 108 | + to_float_components_rounded(abs(T(num)), den, precision, max_subnormal_exp, romo, sb) |
| 109 | +end |
| 110 | + |
| 111 | +function to_floating_point_fallback(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S} |
| 112 | + num_is_zero = iszero(num) |
| 113 | + den_is_zero = iszero(den) |
| 114 | + sb = signbit(num) |
| 115 | + is_zero = num_is_zero & !den_is_zero |
| 116 | + is_inf = !num_is_zero & den_is_zero |
| 117 | + is_regular = !num_is_zero & !den_is_zero |
| 118 | + if is_regular |
| 119 | + let |
| 120 | + c = to_float_components(S, num, den, prec, nothing, rm, sb) |
| 121 | + exp = c.exponent |
| 122 | + signif = T(c.integral_significand)::T |
| 123 | + let x = ldexp(signif, exp - prec + true)::T |
| 124 | + sb ? -x : x |
| 125 | + end::T |
| 126 | + end |
| 127 | + else |
| 128 | + if is_zero |
| 129 | + zero(T)::T |
| 130 | + elseif is_inf |
| 131 | + T(Inf)::T |
| 132 | + else |
| 133 | + T(NaN)::T |
| 134 | + end |
| 135 | + end::T |
| 136 | +end |
| 137 | + |
| 138 | +function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T,S} |
| 139 | + to_floating_point_fallback(T, S, num, den, rm, prec) |
| 140 | +end |
| 141 | + |
| 142 | +function to_floating_point_impl(::Type{T}, ::Type{S}, num, den, rm, prec) where {T<:Base.IEEEFloat,S} |
| 143 | + num_is_zero = iszero(num) |
| 144 | + den_is_zero = iszero(den) |
| 145 | + sb = signbit(num) |
| 146 | + is_zero = num_is_zero & !den_is_zero |
| 147 | + is_inf = !num_is_zero & den_is_zero |
| 148 | + is_regular = !num_is_zero & !den_is_zero |
| 149 | + (rm_is_to_zero, rm_is_from_zero) = if Rnd.rounds_to_nearest(rm) |
| 150 | + (false, false) |
| 151 | + else |
| 152 | + let from = Rnd.rounds_away_from_zero(rm, sb) |
| 153 | + (!from, from) |
| 154 | + end |
| 155 | + end::NTuple{2,Bool} |
| 156 | + exp_max = Base.exponent_max(T) |
| 157 | + exp_min = Base.exponent_min(T) |
| 158 | + ieee_repr = Base.ieee754_representation |
| 159 | + repr_zero = ieee_repr(T, sb, Val(:zero)) |
| 160 | + repr_inf = ieee_repr(T, sb, Val(:inf)) |
| 161 | + repr_nan = ieee_repr(T, sb, Val(:nan)) |
| 162 | + U = typeof(repr_zero) |
| 163 | + repr_zero::U |
| 164 | + repr_inf::U |
| 165 | + repr_nan::U |
| 166 | + |
| 167 | + ret_u = if is_regular |
| 168 | + let |
| 169 | + c = let e = exp_min - 1 |
| 170 | + to_float_components(S, num, den, prec, e, rm, sb) |
| 171 | + end |
| 172 | + exp = c.exponent |
| 173 | + exp_diff = exp - exp_min |
| 174 | + is_normal = 0 ≤ exp_diff |
| 175 | + exp_is_huge_p = exp_max < exp |
| 176 | + exp_is_huge_n = signbit(exp_diff + prec) |
| 177 | + rounds_to_inf = exp_is_huge_p & !rm_is_to_zero |
| 178 | + rounds_to_zero = exp_is_huge_n & !rm_is_from_zero |
| 179 | + |
| 180 | + if !rounds_to_zero & !exp_is_huge_p |
| 181 | + let signif = (c.integral_significand % U) & Base.significand_mask(T) |
| 182 | + exp_field = (max(exp_diff, zero(exp_diff)) + is_normal) % U |
| 183 | + ieee_repr(T, sb, exp_field, signif)::U |
| 184 | + end |
| 185 | + elseif rounds_to_zero |
| 186 | + repr_zero |
| 187 | + elseif rounds_to_inf |
| 188 | + repr_inf |
| 189 | + else |
| 190 | + ieee_repr(T, sb, Val(:omega)) |
| 191 | + end |
| 192 | + end |
| 193 | + else |
| 194 | + if is_zero |
| 195 | + repr_zero |
| 196 | + elseif is_inf |
| 197 | + repr_inf |
| 198 | + else |
| 199 | + repr_nan |
| 200 | + end |
| 201 | + end::U |
| 202 | + |
| 203 | + reinterpret(T, ret_u)::T |
| 204 | +end |
| 205 | + |
| 206 | +# `BigInt` is a safe default. |
| 207 | +to_float_promote_type(::Type{F}, ::Type{S}) where {F,S} = BigInt |
| 208 | + |
| 209 | +const BitIntegerOrBool = Union{Bool,Base.BitInteger} |
| 210 | + |
| 211 | +# As an optimization, use an integer type narrower than `BigInt` when possible. |
| 212 | +function to_float_promote_type(::Type{F}, ::Type{S}) where {F<:Base.IEEEFloat,S<:BitIntegerOrBool} |
| 213 | + Max = if sizeof(F) ≤ sizeof(S) |
| 214 | + S |
| 215 | + else |
| 216 | + (S <: Signed) ? Base.inttype(F) : Base.uinttype(F) |
| 217 | + end |
| 218 | + widen(Max) |
| 219 | +end |
| 220 | + |
| 221 | +function to_floating_point(::Type{F}, num::T, den::T, rm, prec) where {F,T} |
| 222 | + S = to_float_promote_type(F, T) |
| 223 | + to_floating_point_impl(F, S, num, den, rm, prec) |
| 224 | +end |
| 225 | + |
| 226 | +end |
0 commit comments