@@ -17,11 +17,15 @@ import
17
17
cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
18
18
setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
19
19
isone, big, _string_n, decompose, minmax,
20
- sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand
20
+ sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
21
+ uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
22
+ RawBigIntRoundingIncrementHelper, truncated, RawBigInt
21
23
22
24
23
25
using . Base. Libc
24
- import .. Rounding: rounding_raw, setrounding_raw
26
+ import .. Rounding:
27
+ rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
28
+ tie_breaker_is_to_even, correct_rounding_requires_increment
25
29
26
30
import .. GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp
27
31
@@ -89,6 +93,21 @@ function convert(::Type{RoundingMode}, r::MPFRRoundingMode)
89
93
end
90
94
end
91
95
96
+ rounds_to_nearest (m:: MPFRRoundingMode ) = m == MPFRRoundNearest
97
+ function rounds_away_from_zero (m:: MPFRRoundingMode , sign_bit:: Bool )
98
+ if m == MPFRRoundToZero
99
+ false
100
+ elseif m == MPFRRoundUp
101
+ ! sign_bit
102
+ elseif m == MPFRRoundDown
103
+ sign_bit
104
+ else
105
+ # Assuming `m == MPFRRoundFromZero`
106
+ true
107
+ end
108
+ end
109
+ tie_breaker_is_to_even (:: MPFRRoundingMode ) = true
110
+
92
111
const ROUNDING_MODE = Ref {MPFRRoundingMode} (MPFRRoundNearest)
93
112
const DEFAULT_PRECISION = Ref {Clong} (256 )
94
113
@@ -136,6 +155,9 @@ mutable struct BigFloat <: AbstractFloat
136
155
end
137
156
end
138
157
158
+ # The rounding mode here shouldn't matter.
159
+ significand_limb_count (x:: BigFloat ) = div (sizeof (x. _d), sizeof (Limb), RoundToZero)
160
+
139
161
rounding_raw (:: Type{BigFloat} ) = ROUNDING_MODE[]
140
162
setrounding_raw (:: Type{BigFloat} , r:: MPFRRoundingMode ) = ROUNDING_MODE[]= r
141
163
@@ -386,35 +408,69 @@ function (::Type{T})(x::BigFloat) where T<:Integer
386
408
trunc (T,x)
387
409
end
388
410
389
- # # BigFloat -> AbstractFloat
390
- _cpynansgn (x:: AbstractFloat , y:: BigFloat ) = isnan (x) && signbit (x) != signbit (y) ? - x : x
391
-
392
- Float64 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) =
393
- _cpynansgn (ccall ((:mpfr_get_d ,libmpfr), Float64, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
394
- Float64 (x:: BigFloat , r:: RoundingMode ) = Float64 (x, convert (MPFRRoundingMode, r))
395
-
396
- Float32 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) =
397
- _cpynansgn (ccall ((:mpfr_get_flt ,libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
398
- Float32 (x:: BigFloat , r:: RoundingMode ) = Float32 (x, convert (MPFRRoundingMode, r))
399
-
400
- function Float16 (x:: BigFloat ) :: Float16
401
- res = Float32 (x)
402
- resi = reinterpret (UInt32, res)
403
- if (resi& 0x7fffffff ) < 0x38800000 # if Float16(res) is subnormal
404
- # shift so that the mantissa lines up where it would for normal Float16
405
- shift = 113 - ((resi & 0x7f800000 )>> 23 )
406
- if shift< 23
407
- resi |= 0x0080_0000 # set implicit bit
408
- resi >>= shift
411
+ function to_ieee754 (:: Type{T} , x:: BigFloat , rm) where {T<: AbstractFloat }
412
+ sb = signbit (x)
413
+ is_zero = iszero (x)
414
+ is_inf = isinf (x)
415
+ is_nan = isnan (x)
416
+ is_regular = ! is_zero & ! is_inf & ! is_nan
417
+ ieee_exp = Int (x. exp) - 1
418
+ ieee_precision = precision (T)
419
+ ieee_exp_max = exponent_max (T)
420
+ ieee_exp_min = exponent_min (T)
421
+ exp_diff = ieee_exp - ieee_exp_min
422
+ is_normal = 0 ≤ exp_diff
423
+ (rm_is_to_zero, rm_is_from_zero) = if rounds_to_nearest (rm)
424
+ (false , false )
425
+ else
426
+ let from = rounds_away_from_zero (rm, sb)
427
+ (! from, from)
409
428
end
410
- end
411
- if (resi & 0x1fff == 0x1000 ) # if we are halfway between 2 Float16 values
412
- # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
413
- res = nextfloat (res, cmp (x, res))
414
- end
415
- return res
429
+ end :: NTuple{2,Bool}
430
+ exp_is_huge_p = ieee_exp_max < ieee_exp
431
+ exp_is_huge_n = signbit (exp_diff + ieee_precision)
432
+ rounds_to_inf = is_regular & exp_is_huge_p & ! rm_is_to_zero
433
+ rounds_to_zero = is_regular & exp_is_huge_n & ! rm_is_from_zero
434
+ U = uinttype (T)
435
+
436
+ ret_u = if is_regular & ! rounds_to_inf & ! rounds_to_zero
437
+ if ! exp_is_huge_p
438
+ # significand
439
+ v = RawBigInt (x. d, significand_limb_count (x))
440
+ len = max (ieee_precision + min (exp_diff, 0 ), 0 ):: Int
441
+ signif = truncated (U, v, len) & significand_mask (T)
442
+
443
+ # round up if necessary
444
+ rh = RawBigIntRoundingIncrementHelper (v, len)
445
+ incr = correct_rounding_requires_increment (rh, rm, sb)
446
+
447
+ # exponent
448
+ exp_field = max (exp_diff, 0 ) + is_normal
449
+
450
+ ieee754_representation (T, sb, exp_field, signif) + incr
451
+ else
452
+ ieee754_representation (T, sb, Val (:omega ))
453
+ end
454
+ else
455
+ if is_zero | rounds_to_zero
456
+ ieee754_representation (T, sb, Val (:zero ))
457
+ elseif is_inf | rounds_to_inf
458
+ ieee754_representation (T, sb, Val (:inf ))
459
+ else
460
+ ieee754_representation (T, sb, Val (:nan ))
461
+ end
462
+ end :: U
463
+
464
+ reinterpret (T, ret_u)
416
465
end
417
466
467
+ Float16 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float16, x, r)
468
+ Float32 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float32, x, r)
469
+ Float64 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float64, x, r)
470
+ Float16 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float16, x, r)
471
+ Float32 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float32, x, r)
472
+ Float64 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float64, x, r)
473
+
418
474
promote_rule (:: Type{BigFloat} , :: Type{<:Real} ) = BigFloat
419
475
promote_rule (:: Type{BigInt} , :: Type{<:AbstractFloat} ) = BigFloat
420
476
promote_rule (:: Type{BigFloat} , :: Type{<:AbstractFloat} ) = BigFloat
0 commit comments