@@ -129,12 +129,182 @@ Bool(x::Rational) = x==0 ? false : x==1 ? true :
129
129
(:: Type{T} )(x:: Rational ) where {T<: Integer } = (isinteger (x) ? convert (T, x. num):: T :
130
130
throw (InexactError (nameof (T), T, x)))
131
131
132
- AbstractFloat (x:: Rational ) = (float (x. num)/ float (x. den)):: AbstractFloat
133
- function (:: Type{T} )(x:: Rational{S} ) where T<: AbstractFloat where S
134
- P = promote_type (T,S)
135
- convert (T, convert (P,x. num)/ convert (P,x. den)):: T
132
+ bit_width (n) = ndigits (n, base = UInt8 (2 ), pad = false )
133
+
134
+ const RoundingModeIndependentFromSign = Union{
135
+ RoundingMode{:ToZero }, RoundingMode{:FromZero },
136
+ RoundingMode{:Nearest }, RoundingMode{:NearestTiesAway }}
137
+
138
+ clamped_to_zero (x) = max (zero (x), x)
139
+
140
+ struct FloatComponentsResult{S<: Integer ,T<: Integer }
141
+ mantissa:: S
142
+ exponent:: T
143
+ inexact:: Bool
144
+ underflowed:: Bool
145
+
146
+ function FloatComponentsResult (m:: S , e:: T ;
147
+ inexact:: Bool ,
148
+ underflowed:: Bool ) where {S<: Integer ,T<: Integer }
149
+ new {S,T} (m, e, inexact, underflowed)
150
+ end
151
+ end
152
+
153
+ # `num`, `den` are positive integers. `requested_precision` is the
154
+ # requested floating-point precision. `T` is the integer type that
155
+ # we'll be working with mostly, it needs to be wide enough.
156
+ # `exp_lower_bound` is the minimum allowed normalized exponent for
157
+ # normal numbers.
158
+ function rational_to_float_components_impl (num:: Integer ,
159
+ den:: Integer ,
160
+ requested_precision:: Integer ,
161
+ :: Type{T} ,
162
+ romo:: RoundingModeIndependentFromSign ,
163
+ exp_lower_bound:: Union{Nothing,Integer} ) where {T<: Integer }
164
+ num_bit_width = bit_width (num)
165
+ den_bit_width = bit_width (den)
166
+
167
+ # `T` must be wide enough to avoid overflow.
168
+ numT = T (num)
169
+ denT = T (den)
170
+
171
+ # Creates a mantissa in `quo_` that's at least
172
+ # `requested_precision` bits wide.
173
+ bit_shift_num_ = clamped_to_zero (den_bit_width - num_bit_width + requested_precision)
174
+ quo_ = div (numT << bit_shift_num_, den, RoundToZero)
175
+
176
+ # Normalized exponent
177
+ nexp = bit_width (quo_) - bit_shift_num_ - true
178
+
179
+ # Take possible underflow into account: if there's underflow, the
180
+ # precision needs to be less than requested.
181
+ adjusted_precision = requested_precision
182
+ if ! isnothing (exp_lower_bound)
183
+ underflow = clamped_to_zero (exp_lower_bound - nexp)
184
+ adjusted_precision -= underflow
185
+ end
186
+
187
+ # Nonnegative. Experiments indicate that, when iterating over all
188
+ # possible numerators and denominators below some bit width, with
189
+ # some fixed value for `adjusted_precision`, the maximal attained
190
+ # value will be
191
+ # `max(1, max(num_bit_width, den_bit_width) - adjusted_precision)`.
192
+ # So in the worst case we have `adjusted_precision == 1` and
193
+ # `excess_precision == max(1, max(num_bit_width, den_bit_width) - 1)`
194
+ excess_precision = bit_width (quo_) - adjusted_precision
195
+
196
+ (adjusted_precision < 0 ) && return FloatComponentsResult (
197
+ zero (quo_), zero (nexp), inexact = true , underflowed = true )
198
+
199
+ # Creates a mantissa in `quo` that's exactly `adjusted_precision`
200
+ # bits wide, except if rounding up happens, in which case the bit
201
+ # width may be `adjusted_precision + 1`.
202
+ bit_shift_num = clamped_to_zero (bit_shift_num_ - excess_precision)
203
+ bit_shift_den = excess_precision - (bit_shift_num_ - bit_shift_num) # nonnegative
204
+ (quo, rem) = divrem (numT << bit_shift_num, denT << bit_shift_den, romo)
205
+
206
+ result_is_exact = iszero (rem)
207
+
208
+ nexp_final = nexp + (adjusted_precision < bit_width (quo))
209
+ is_subnormal = ! isnothing (exp_lower_bound) && (nexp_final < exp_lower_bound)
210
+
211
+ iszero (quo) || (quo >>= trailing_zeros (quo))
212
+
213
+ # The bit width of `quo` is now less than or equal to
214
+ # `adjusted_precision`, except if `adjusted_precision` is zero, in
215
+ # which case `bit_width(quo)` may be one, in case rounding up
216
+ # happened.
217
+
218
+ FloatComponentsResult (
219
+ quo, nexp_final, inexact = ! result_is_exact, underflowed = is_subnormal)
220
+ end
221
+
222
+ # `num`, `den` are positive integers. `bit_width` is the requested
223
+ # floating-point precision and must be positive. `T` is the integer
224
+ # type that we'll be working with mostly, it needs to be wide enough.
225
+ # `exp_lower_bound` is the minimum allowed normalized exponent for
226
+ # normal numbers.
227
+ function rational_to_float_components (num:: Integer ,
228
+ den:: Integer ,
229
+ bit_width:: Integer ,
230
+ :: Type{T} ,
231
+ romo:: RoundingModeIndependentFromSign ,
232
+ exp_lower_bound:: Union{Nothing,Integer} = nothing ) where {T<: Integer }
233
+ # Factor out powers of two
234
+ tz_num = trailing_zeros (num)
235
+ tz_den = trailing_zeros (den)
236
+ dexp = tz_num - tz_den
237
+ exp_lb = isnothing (exp_lower_bound) ? nothing : exp_lower_bound - dexp
238
+ c = rational_to_float_components_impl (
239
+ num >> tz_num, den >> tz_den, bit_width, T, romo, exp_lb)
240
+ FloatComponentsResult (
241
+ c. mantissa, c. exponent + dexp, inexact = c. inexact, underflowed = c. underflowed)
242
+ end
243
+
244
+ # Assuming the wanted rounding mode is round to nearest with ties to
245
+ # even.
246
+ #
247
+ # `precision` must be positive.
248
+ function rational_to_float_impl (to_float:: C ,
249
+ :: Type{T} ,
250
+ x:: Rational ,
251
+ precision:: Integer ) where {C<: Union{Type,Function} ,T<: Integer }
252
+ s = Int8 (sign (numerator (x)))
253
+ a = abs (x)
254
+
255
+ num = numerator (a)
256
+ den = denominator (a)
257
+
258
+ # Handle special cases
259
+ num_is_zero = iszero (num)
260
+ den_is_zero = iszero (den)
261
+ if den_is_zero
262
+ num_is_zero && return to_float (NaN ) # 0/0
263
+ return to_float (s * Inf ) # n/0 = sign(n) * Inf
264
+ end
265
+ num_is_zero && return to_float (false ) # 0/n = 0
266
+
267
+ F = typeof (to_float (false ))
268
+ c = rational_to_float_components (
269
+ num, den, precision, T, RoundNearest, min_normal_number_exponent (F))
270
+ bw = bit_width (c. mantissa)
271
+
272
+ # TODO : `ldexp` could be replaced with a mere bit of bit twiddling
273
+ # in the case of `Float16`, `Float32`, `Float64`
274
+ ldexp (s * to_float (c. mantissa), c. exponent - bw + true ):: F
136
275
end
137
276
277
+ function rational_to_float_promote_type (:: Type{F} ,
278
+ :: Type{S} ) where {F<: AbstractFloat ,S<: Integer }
279
+ BigInt
280
+ end
281
+
282
+ function rational_to_float_promote_type (:: Type{F} ,
283
+ :: Type{S} ) where {F<: AbstractFloat ,S<: Unsigned }
284
+ rational_to_float_promote_type (F, widen (signed (S)))
285
+ end
286
+
287
+ # As an optimization, use a narrower type when possible.
288
+ rational_to_float_promote_type (:: Type{Float16} , :: Type{<:Union{Int8,Int16}} ) = Int32
289
+ rational_to_float_promote_type (:: Type{Float32} , :: Type{<:Union{Int8,Int16}} ) = Int64
290
+ rational_to_float_promote_type (:: Type{Float64} , :: Type{<:Union{Int8,Int16}} ) = Int128
291
+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32}} , :: Type{Int32} ) = Int64
292
+ rational_to_float_promote_type (:: Type{Float64} , :: Type{Int32} ) = Int128
293
+ rational_to_float_promote_type (:: Type{<:Union{Float16,Float32,Float64}} , :: Type{Int64} ) = Int128
294
+
295
+ rational_to_float (:: Type{F} , x:: Rational{Bool} ) where {F<: AbstractFloat } = F (numerator (x)/ denominator (x))
296
+
297
+ function rational_to_float (:: Type{F} , x:: Rational{T} ) where {F<: AbstractFloat ,T<: Integer }
298
+ rational_to_float_impl (F, rational_to_float_promote_type (F, T), x, precision (F)):: F
299
+ end
300
+
301
+ function (:: Type{F} )(x:: Rational ) where {F<: AbstractFloat }
302
+ y = unsafe_rational (numerator (x), denominator (x))
303
+ rational_to_float (F, y):: F
304
+ end
305
+
306
+ AbstractFloat (x:: Q ) where {Q<: Rational } = float (Q)(x):: AbstractFloat
307
+
138
308
function Rational {T} (x:: AbstractFloat ) where T<: Integer
139
309
r = rationalize (T, x, tol= 0 )
140
310
x == convert (typeof (x), r) || throw (InexactError (:Rational , Rational{T}, x))
0 commit comments