@@ -129,12 +129,183 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129
129
(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130
130
throw (InexactError (nameof (T), T, x)))
131
131
132
- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133
- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134
- P = promote_type (T,S)
135
- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132
+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133
+
134
+ const RoundingModeIndependentFromSign = Union{
135
+ RoundingMode{:ToZero }, RoundingMode{:FromZero },
136
+ RoundingMode{:Nearest }, RoundingMode{:NearestTiesAway }}
137
+
138
+ clamped_to_zero (x) = max (zero (x), x)
139
+
140
+ struct FloatComponentsResult{S<: Integer ,T<: Integer }
141
+ mantissa:: S
142
+ exponent:: T
143
+ inexact:: Bool
144
+ underflowed:: Bool
145
+
146
+ function FloatComponentsResult (m:: S , e:: T ;
147
+ inexact:: Bool ,
148
+ underflowed:: Bool ) where {S<: Integer ,T<: Integer }
149
+ new {S,T} (m, e, inexact, underflowed)
150
+ end
151
+ end
152
+
153
+ # `num`, `den` are positive integers. `requested_precision` is the
154
+ # requested floating-point precision. `T` is the integer type that
155
+ # we'll be working with mostly, it needs to be wide enough.
156
+ # `exp_lower_bound` is the minimum allowed normalized exponent for
157
+ # normal numbers.
158
+ function rational_to_float_components_impl (num:: Integer ,
159
+ den:: Integer ,
160
+ requested_precision:: Integer ,
161
+ :: Type{T} ,
162
+ romo:: RoundingModeIndependentFromSign ,
163
+ exp_lower_bound:: Union{Nothing,Integer} ) where {T<: Integer }
164
+ num_bit_width = bit_width (num)
165
+ den_bit_width = bit_width (den)
166
+
167
+ # `T` must be wide enough to avoid overflow.
168
+ numT = T (num)
169
+ denT = T (den)
170
+
171
+ # Creates a mantissa in `quo_` that's at least
172
+ # `requested_precision` bits wide.
173
+ bit_shift_num_ = clamped_to_zero (den_bit_width - num_bit_width + requested_precision)
174
+ quo_ = div (numT << bit_shift_num_, den, RoundToZero)
175
+
176
+ # Nonnegative. Experiments indicate that, when iterating over all
177
+ # possible numerators and denominators below some bit width, with
178
+ # some fixed value for `requested_precision`, the maximal attained
179
+ # value will be
180
+ # `max(1, max(num_bit_width, den_bit_width) - requested_precision)`.
181
+ # So in the worst case we have `requested_precision == 1` and
182
+ # `excess_precision == max(1, max(num_bit_width, den_bit_width) - 1)`
183
+ excess_precision = bit_width (quo_) - requested_precision
184
+
185
+ # Normalized exponent
186
+ nexp = bit_width (quo_) - bit_shift_num_ - true
187
+
188
+ # Take possible underflow into account: if there's underflow, the
189
+ # precision needs to be less than requested.
190
+ adjusted_precision = requested_precision
191
+ if ! isnothing (exp_lower_bound)
192
+ underflow = clamped_to_zero (exp_lower_bound - nexp)
193
+ adjusted_precision -= underflow
194
+ end
195
+ adjusted_excess_precision = bit_width (quo_) - adjusted_precision
196
+
197
+ (adjusted_precision < 0 ) && return FloatComponentsResult (
198
+ zero (quo_), zero (nexp), inexact = true , underflowed = true )
199
+
200
+ # Creates a mantissa in `quo` that's exactly `adjusted_precision`
201
+ # bits wide, except if rounding up happens, in which case the bit
202
+ # width may be `adjusted_precision + 1`.
203
+ bit_shift_num = clamped_to_zero (bit_shift_num_ - adjusted_excess_precision)
204
+ bit_shift_den = adjusted_excess_precision - (bit_shift_num_ - bit_shift_num) # nonnegative
205
+ (quo, rem) = divrem (numT << bit_shift_num, denT << bit_shift_den, romo)
206
+
207
+ result_is_exact = iszero (rem)
208
+
209
+ nexp_final = nexp + (adjusted_precision < bit_width (quo))
210
+ is_subnormal = ! isnothing (exp_lower_bound) && (nexp_final < exp_lower_bound)
211
+
212
+ iszero (quo) || (quo >>= trailing_zeros (quo))
213
+
214
+ # The bit width of `quo` is now less than or equal to
215
+ # `adjusted_precision`, except if `adjusted_precision` is zero, in
216
+ # which case `bit_width(quo)` may be one, in case rounding up
217
+ # happened.
218
+
219
+ FloatComponentsResult (
220
+ quo, nexp_final, inexact = ! result_is_exact, underflowed = is_subnormal)
221
+ end
222
+
223
+ # `num`, `den` are positive integers. `bit_width` is the requested
224
+ # floating-point precision and must be positive. `T` is the integer
225
+ # type that we'll be working with mostly, it needs to be wide enough.
226
+ # `exp_lower_bound` is the minimum allowed normalized exponent for
227
+ # normal numbers.
228
+ function rational_to_float_components (num:: Integer ,
229
+ den:: Integer ,
230
+ bit_width:: Integer ,
231
+ :: Type{T} ,
232
+ romo:: RoundingModeIndependentFromSign ,
233
+ exp_lower_bound:: Union{Nothing,Integer} = nothing ) where {T<: Integer }
234
+ # Factor out powers of two
235
+ tz_num = trailing_zeros (num)
236
+ tz_den = trailing_zeros (den)
237
+ dexp = tz_num - tz_den
238
+ exp_lb = isnothing (exp_lower_bound) ? nothing : exp_lower_bound - dexp
239
+ c = rational_to_float_components_impl (
240
+ num >> tz_num, den >> tz_den, bit_width, T, romo, exp_lb)
241
+ FloatComponentsResult (
242
+ c. mantissa, c. exponent + dexp, inexact = c. inexact, underflowed = c. underflowed)
243
+ end
244
+
245
+ # Assuming the wanted rounding mode is round to nearest with ties to
246
+ # even.
247
+ #
248
+ # `precision` must be positive.
249
+ function rational_to_float_impl (to_float:: C ,
250
+ :: Type{T} ,
251
+ x:: Rational ,
252
+ precision:: Integer ) where {C<: Union{Type,Function} ,T<: Integer }
253
+ s = Int8 (sign (numerator (x)))
254
+ a = abs (x)
255
+
256
+ num = numerator (a)
257
+ den = denominator (a)
258
+
259
+ # Handle special cases
260
+ num_is_zero = iszero (num)
261
+ den_is_zero = iszero (den)
262
+ if den_is_zero
263
+ num_is_zero && return to_float (NaN ) # 0/0
264
+ return to_float (s * Inf ) # n/0 = sign(n) * Inf
265
+ end
266
+ num_is_zero && return to_float (false ) # 0/n = 0
267
+
268
+ F = typeof (to_float (false ))
269
+ c = rational_to_float_components (
270
+ num, den, precision, T, RoundNearest, min_normal_number_exponent (F))
271
+ bw = bit_width (c. mantissa)
272
+
273
+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
274
+ # in the case of `Float16`, `Float32`, `Float64`
275
+ ldexp (s * to_float (c. mantissa), c. exponent - bw + true ):: F
136
276
end
137
277
278
+ function rational_to_float_promote_type (:: Type{F} ,
279
+ :: Type{S} ) where {F<: AbstractFloat ,S<: Integer }
280
+ BigInt
281
+ end
282
+
283
+ function rational_to_float_promote_type (:: Type{F} ,
284
+ :: Type{S} ) where {F<: AbstractFloat ,S<: Unsigned }
285
+ rational_to_float_promote_type (F, widen (signed (S)))
286
+ end
287
+
288
+ # As an optimization, use a narrower type when possible.
289
+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
290
+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
291
+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
292
+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
293
+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
294
+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
295
+
296
+ rational_to_float (:: Type{F} , x:: Rational{Bool} ) where {F<: AbstractFloat } = F (numerator (x)/ denominator (x))
297
+
298
+ function rational_to_float (:: Type{F} , x:: Rational{T} ) where {F<: AbstractFloat ,T<: Integer }
299
+ rational_to_float_impl (F, rational_to_float_promote_type (F, T), x, precision (F)):: F
300
+ end
301
+
302
+ function (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat }
303
+ y = unsafe_rational (numerator (x), denominator (x))
304
+ rational_to_float (F, y):: F
305
+ end
306
+
307
+ AbstractFloat (x:: Q ) where {Q<: Rational } = float (Q)(x):: AbstractFloat
308
+
138
309
function Rational {T} (x:: AbstractFloat ) where T<: Integer
139
310
r = rationalize (T, x, tol= 0 )
140
311
x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments