WIP: use bit twiddling instead of ldexp for IEEE floats

nsajko · nsajko · commit 50d5aa8aab6b · 2023-05-12T14:57:46.000+02:00
diff --git a/base/float.jl b/base/float.jl
@@ -148,6 +148,67 @@ signed integer, so that `abs(typemin(x)) == typemin(x) < 0`, in which case the r
 uabs(x::Integer) = abs(x)
 uabs(x::BitSigned) = unsigned(abs(x))
 
+function float_representation(
+    ::Type{F},
+    signbit::Bool, exponent_field::Integer, mantissa_field::Integer,
+) where {F<:IEEEFloat}
+    T = uinttype(F)
+    sign_and_exp = (T(signbit) << exponent_bits(F)) | T(exponent_field)
+    ret = (sign_and_exp << significand_bits(F)) | T(mantissa_field)
+    ret::T
+end
+
+float_representation_of_infinity(::Type{F}, signbit::Bool) where {F<:IEEEFloat} =
+    float_representation(F, signbit, exponent_raw_max(F), false)
+
+float_representation_of_zero(::Type{F}, signbit::Bool) where {F<:IEEEFloat} =
+    float_representation(F, signbit, false, false)
+
+function float_representation_from_components(
+    ::Type{F},
+    sign::Real, exp::Integer, mantissa::Integer,
+) where {F<:IEEEFloat}
+    T = uinttype(F)
+    sb = signbit(sign)
+
+    iszero(sign) && return float_representation_of_zero(F, sb)
+
+    normalized_exp = exp + significand_bits(F)
+
+    if exponent_max(F) < normalized_exp
+        # overflow (infinity)
+        float_representation_of_infinity(F, sb)
+    elseif normalized_exp < true - exponent_bias(F)
+        # underflow (subnormal or zero)
+        ed = true - exponent_bias(F) - normalized_exp
+        float_representation(F, sb, false, mantissa >> ed)
+    else
+        # normal: `true - exponent_bias(F) ≤ normalized_exp ≤ exponent_max(F)`
+        mantissa_field = T(mantissa) & significand_mask(F)  # clear the leading set bit
+        e = normalized_exp + exponent_bias(F)
+        float_representation(F, sb, e, mantissa_field)
+    end
+end
+
+float_from_components(
+    ::Type{F},
+    sign::Real, exp::Integer, mantissa::Integer,
+) where {F<:IEEEFloat} =
+    reinterpret(F, float_representation_from_components(F, sign, exp, mantissa))
+
+# The input parameters represent the number `sign * 2^exp * mantissa`,
+# let's call it `n`. The sign is expected to be an integer between `-1`
+# and `1`, and the mantissa is expected to be as wide as the mantissa
+# of the floating-point type `F`, not counting the leading bit. Let's
+# call the number `x`.
+#
+# Returns the same value as `ldexp(sign * F(mantissa), exp)`
+float_from_components(
+    ::Type{F},
+    sign::Real, exp::Integer, mantissa::Integer,
+) where {F<:AbstractFloat} =
+    ldexp(sign * F(mantissa), exp)
+
 ## conversions to floating-point ##
 
 # TODO: deprecate in 2.0
diff --git a/base/rational.jl b/base/rational.jl
@@ -360,11 +360,10 @@ function rational_to_float_impl(
       T,
       RoundNearest,
     )
-    mantissa = to_float(components.mantissa)
-
-    # TODO: `ldexp` could be replaced with a mere bit of bit twiddling
-    # in the case of `Float16`, `Float32`, `Float64`
-    ret = ldexp(s * mantissa, components.exponent)
+    ret = float_from_components(
+        typeof(to_float(false)),
+        s, components.exponent, components.mantissa,
+    )
 
     # TODO: faster?
     if iszero(ret) | issubnormal(ret)
@@ -377,11 +376,10 @@ function rational_to_float_impl(
           T,
           RoundToZero,
         )
-        mantissa = to_float(components.mantissa | !components.is_exact)
-
-        # TODO: `ldexp` could be replaced with a mere bit of bit
-        # twiddling in the case of `Float16`, `Float32`, `Float64`
-        ret = ldexp(s * mantissa, components.exponent)
+        ret = float_from_components(
+            typeof(to_float(false)),
+            s, components.exponent, components.mantissa | !components.is_exact,
+        )
     end
 
     ret