@@ -17,11 +17,15 @@ import
17
17
cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
18
18
setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
19
19
isone, big, _string_n, decompose, minmax,
20
- sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand
20
+ sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
21
+ uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
22
+ RawBigIntRoundingIncrementHelper, truncated, RawBigInt
21
23
22
24
23
25
using . Base. Libc
24
- import .. Rounding: rounding_raw, setrounding_raw
26
+ import .. Rounding:
27
+ rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
28
+ tie_breaker_is_to_even, correct_rounding_requires_increment
25
29
26
30
import .. GMP: ClongMax, CulongMax, CdoubleMax, Limb, libgmp
27
31
@@ -89,6 +93,21 @@ function convert(::Type{RoundingMode}, r::MPFRRoundingMode)
89
93
end
90
94
end
91
95
96
+ rounds_to_nearest (m:: MPFRRoundingMode ) = m == MPFRRoundNearest
97
+ function rounds_away_from_zero (m:: MPFRRoundingMode , sign_bit:: Bool )
98
+ if m == MPFRRoundToZero
99
+ false
100
+ elseif m == MPFRRoundUp
101
+ ! sign_bit
102
+ elseif m == MPFRRoundDown
103
+ sign_bit
104
+ else
105
+ # Assuming `m == MPFRRoundFromZero`
106
+ true
107
+ end
108
+ end
109
+ tie_breaker_is_to_even (:: MPFRRoundingMode ) = true
110
+
92
111
const ROUNDING_MODE = Ref {MPFRRoundingMode} (MPFRRoundNearest)
93
112
const DEFAULT_PRECISION = Ref {Clong} (256 )
94
113
@@ -130,6 +149,9 @@ mutable struct BigFloat <: AbstractFloat
130
149
end
131
150
end
132
151
152
+ # The rounding mode here shouldn't matter.
153
+ significand_limb_count (x:: BigFloat ) = div (sizeof (x. _d), sizeof (Limb), RoundToZero)
154
+
133
155
rounding_raw (:: Type{BigFloat} ) = ROUNDING_MODE[]
134
156
setrounding_raw (:: Type{BigFloat} , r:: MPFRRoundingMode ) = ROUNDING_MODE[]= r
135
157
@@ -380,35 +402,69 @@ function (::Type{T})(x::BigFloat) where T<:Integer
380
402
trunc (T,x)
381
403
end
382
404
383
- # # BigFloat -> AbstractFloat
384
- _cpynansgn (x:: AbstractFloat , y:: BigFloat ) = isnan (x) && signbit (x) != signbit (y) ? - x : x
385
-
386
- Float64 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) =
387
- _cpynansgn (ccall ((:mpfr_get_d ,libmpfr), Float64, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
388
- Float64 (x:: BigFloat , r:: RoundingMode ) = Float64 (x, convert (MPFRRoundingMode, r))
389
-
390
- Float32 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) =
391
- _cpynansgn (ccall ((:mpfr_get_flt ,libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
392
- Float32 (x:: BigFloat , r:: RoundingMode ) = Float32 (x, convert (MPFRRoundingMode, r))
393
-
394
- function Float16 (x:: BigFloat ) :: Float16
395
- res = Float32 (x)
396
- resi = reinterpret (UInt32, res)
397
- if (resi& 0x7fffffff ) < 0x38800000 # if Float16(res) is subnormal
398
- # shift so that the mantissa lines up where it would for normal Float16
399
- shift = 113 - ((resi & 0x7f800000 )>> 23 )
400
- if shift< 23
401
- resi |= 0x0080_0000 # set implicit bit
402
- resi >>= shift
405
+ function to_ieee754 (:: Type{T} , x:: BigFloat , rm) where {T<: AbstractFloat }
406
+ sb = signbit (x)
407
+ is_zero = iszero (x)
408
+ is_inf = isinf (x)
409
+ is_nan = isnan (x)
410
+ is_regular = ! is_zero & ! is_inf & ! is_nan
411
+ ieee_exp = Int (x. exp) - 1
412
+ ieee_precision = precision (T)
413
+ ieee_exp_max = exponent_max (T)
414
+ ieee_exp_min = exponent_min (T)
415
+ exp_diff = ieee_exp - ieee_exp_min
416
+ is_normal = 0 ≤ exp_diff
417
+ (rm_is_to_zero, rm_is_from_zero) = if rounds_to_nearest (rm)
418
+ (false , false )
419
+ else
420
+ let from = rounds_away_from_zero (rm, sb)
421
+ (! from, from)
403
422
end
404
- end
405
- if (resi & 0x1fff == 0x1000 ) # if we are halfway between 2 Float16 values
406
- # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
407
- res = nextfloat (res, cmp (x, res))
408
- end
409
- return res
423
+ end :: NTuple{2,Bool}
424
+ exp_is_huge_p = ieee_exp_max < ieee_exp
425
+ exp_is_huge_n = signbit (exp_diff + ieee_precision)
426
+ rounds_to_inf = is_regular & exp_is_huge_p & ! rm_is_to_zero
427
+ rounds_to_zero = is_regular & exp_is_huge_n & ! rm_is_from_zero
428
+ U = uinttype (T)
429
+
430
+ ret_u = if is_regular & ! rounds_to_inf & ! rounds_to_zero
431
+ if ! exp_is_huge_p
432
+ # significand
433
+ v = RawBigInt (x. d, significand_limb_count (x))
434
+ len = max (ieee_precision + min (exp_diff, 0 ), 0 ):: Int
435
+ signif = truncated (U, v, len) & significand_mask (T)
436
+
437
+ # round up if necessary
438
+ rh = RawBigIntRoundingIncrementHelper (v, len)
439
+ incr = correct_rounding_requires_increment (rh, rm, sb)
440
+
441
+ # exponent
442
+ exp_field = max (exp_diff, 0 ) + is_normal
443
+
444
+ ieee754_representation (T, sb, exp_field, signif) + incr
445
+ else
446
+ ieee754_representation (T, sb, Val (:omega ))
447
+ end
448
+ else
449
+ if is_zero | rounds_to_zero
450
+ ieee754_representation (T, sb, Val (:zero ))
451
+ elseif is_inf | rounds_to_inf
452
+ ieee754_representation (T, sb, Val (:inf ))
453
+ else
454
+ ieee754_representation (T, sb, Val (:nan ))
455
+ end
456
+ end :: U
457
+
458
+ reinterpret (T, ret_u)
410
459
end
411
460
461
+ Float16 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float16, x, r)
462
+ Float32 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float32, x, r)
463
+ Float64 (x:: BigFloat , r:: MPFRRoundingMode = ROUNDING_MODE[]) = to_ieee754 (Float64, x, r)
464
+ Float16 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float16, x, r)
465
+ Float32 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float32, x, r)
466
+ Float64 (x:: BigFloat , r:: RoundingMode ) = to_ieee754 (Float64, x, r)
467
+
412
468
promote_rule (:: Type{BigFloat} , :: Type{<:Real} ) = BigFloat
413
469
promote_rule (:: Type{BigInt} , :: Type{<:AbstractFloat} ) = BigFloat
414
470
promote_rule (:: Type{BigFloat} , :: Type{<:AbstractFloat} ) = BigFloat
0 commit comments