@@ -38,16 +38,50 @@ truncbits(x, nb) = x
3838
3939# # Dekker arithmetic
4040
41+ # Reference for double-word floating-point arithmetic:
42+ #
43+ # Tight and Rigorous Error Bounds for Basic Building Blocks of
44+ # Double-Word Arithmetic
45+ #
46+ # ACM Transactions on Mathematical Software, Vol. 44, No. 2, Article
47+ # 15res. Publication date: October 2017
48+ #
49+ # Mioara Joldes, Jean-Michel Muller, and Valentina Popescu
50+ #
51+ # https://doi.org/10.1145/3121432
52+
53+ function fast_two_sum (big:: T , little:: T ) where {T<: AbstractFloat }
54+ h = big + little
55+ (h, (big - h) + little)
56+ end
57+
4158"""
4259 hi, lo = canonicalize2(big, little)
4360
4461Generate a representation where all the nonzero bits in `hi` are more
4562significant than any of the nonzero bits in `lo`. `big` must be larger
4663in absolute value than `little`.
4764"""
48- function canonicalize2 (big, little)
49- h = big+ little
50- h, (big - h) + little
65+ canonicalize2 (big:: T , little:: T ) where {T<: AbstractFloat } = fast_two_sum (big, little)
66+ canonicalize2 (big:: T , little:: T ) where {T} = (big + little, zero (big))
67+
68+ # `add12_branchful` and `add12_branchless` are equivalent, known to
69+ # produce the same results as long as there's no overflow and no
70+ # underflow
71+
72+ function add12_branchful (x:: T , y:: T ) where {T<: AbstractFloat }
73+ x, y = ifelse (abs (y) > abs (x), (y, x), (x, y))
74+ fast_two_sum (x, y)
75+ end
76+
77+ function add12_branchless (a:: T , b:: T ) where {T<: AbstractFloat }
78+ s = a + b
79+ a_ = s - b
80+ b_ = s - a_
81+ δa = a - a_
82+ δb = b - b_
83+ t = δa + δb
84+ (s, t)
5185end
5286
5387"""
@@ -80,10 +114,8 @@ julia> big(hi) + big(lo)
80114`lo` differs from 1.0e-19 because `hi` is not exactly equal to
81115the first 16 decimal digits of the answer.
82116"""
83- function add12 (x:: T , y:: T ) where {T}
84- x, y = ifelse (abs (y) > abs (x), (y, x), (x, y))
85- canonicalize2 (x, y)
86- end
117+ add12 (x:: T , y:: T ) where {T<: AbstractFloat } = add12_branchless (x, y)
118+ add12 (x:: T , y:: T ) where {T} = (x + y, zero (x))
87119add12 (x, y) = add12 (promote (x, y)... )
88120
89121"""
118150mul12 (x:: T , y:: T ) where {T} = (p = x * y; (p, zero (p)))
119151mul12 (x, y) = mul12 (promote (x, y)... )
120152
153+ # "DWPlusFP" AKA "Algorithm 4" from Joldes, Muller, Popescu
154+ function dw_plus_fp (x:: NTuple{2,T} , y:: T ) where {T<: AbstractFloat }
155+ (x_hi, x_lo) = x
156+ (s_hi, s_lo) = add12 (x_hi, y)
157+ v = x_lo + s_lo
158+ fast_two_sum (s_hi, v)
159+ end
160+
161+ # "AccurateDWPlusDW" AKA "Algorithm 6" from Joldes, Muller, Popescu
162+ function dw_plus_dw (x:: NTuple{2,T} , y:: NTuple{2,T} ) where {T<: AbstractFloat }
163+ (x_hi, x_lo) = x
164+ (y_hi, y_lo) = y
165+ (s_hi, s_lo) = add12 (x_hi, y_hi)
166+ (t_hi, t_lo) = add12 (x_lo, y_lo)
167+ c = s_lo + t_hi
168+ (v_hi, v_lo) = fast_two_sum (s_hi, c)
169+ w = t_lo + v_lo
170+ fast_two_sum (v_hi, w)
171+ end
172+
173+ # "DWTimesFP1" AKA "Algorithm 7" from Joldes, Muller, Popescu
174+ function dw_times_fp (x:: NTuple{2,T} , y:: T ) where {T<: AbstractFloat }
175+ (x_hi, x_lo) = x
176+ (c_hi, c_l1) = mul12 (x_hi, y)
177+ c_l2 = x_lo * y
178+ (t_hi, t_l1) = fast_two_sum (c_hi, c_l2)
179+ t_l2 = t_l1 + c_l1
180+ fast_two_sum (t_hi, t_l2)
181+ end
182+
183+ # "DWTimesDW3" AKA "Algorithm 12" from Joldes, Muller, Popescu
184+ function dw_times_dw (x:: NTuple{2,T} , y:: NTuple{2,T} ) where {T<: AbstractFloat }
185+ (x_hi, x_lo) = x
186+ (y_hi, y_lo) = y
187+ (c_hi, c_l1) = mul12 (x_hi, y_hi)
188+ t_l0 = x_lo * y_lo
189+ t_l1 = fma (x_hi, y_lo, t_l0)
190+ c_l2 = fma (x_lo, y_hi, t_l1)
191+ c_l3 = c_l1 + c_l2
192+ fast_two_sum (c_hi, c_l3)
193+ end
194+
195+ # "DWDivFP3" AKA "Algorithm 15" from Joldes, Muller, Popescu
196+ function dw_div_fp (x:: NTuple{2,T} , y:: T ) where {T<: AbstractFloat }
197+ (x_hi, x_lo) = x
198+ hi = x_hi / y
199+ π = Base. Math. two_mul (hi, y)
200+ δ_hi = x_hi - first (π) # exact operation
201+ δ_t = δ_hi - last (π) # exact operation
202+ δ = δ_t + x_lo
203+ lo = δ / y
204+ fast_two_sum (hi, lo)
205+ end
206+
207+ # "DWDivDW3" AKA "Algorithm 18" from Joldes, Muller, Popescu
208+ function dw_div_dw (x:: NTuple{2,T} , y:: NTuple{2,T} ) where {T<: AbstractFloat }
209+ (x_hi, x_lo) = x
210+ (y_hi, y_lo) = y
211+ t_hi = 1 / y_hi
212+ r_hi = fma (- y_hi, t_hi, true ) # exact operation
213+ r_lo = - y_lo * t_hi
214+ e = fast_two_sum (r_hi, r_lo)
215+ δ = dw_times_fp (e, t_hi)
216+ m = dw_plus_fp (δ, t_hi)
217+ dw_times_dw (x, m)
218+ end
219+
220+ div12_kernel (x:: T , y:: T ) where {T<: AbstractFloat } =
221+ dw_div_fp ((x, zero (x)), y)
222+
121223"""
122224 zhi, zlo = div12(x, y)
123225
@@ -149,7 +251,7 @@ function div12(x::T, y::T) where {T<:AbstractFloat}
149251 xs, xe = frexp (x)
150252 ys, ye = frexp (y)
151253 r = xs / ys
152- rh, rl = canonicalize2 (r, - fma (r, ys, - xs) / ys)
254+ rh, rl = div12_kernel (xs, ys)
153255 ifelse (iszero (r) | ! isfinite (r), (r, r), (ldexp (rh, xe- ye), ldexp (rl, xe- ye)))
154256end
155257div12 (x:: T , y:: T ) where {T} = (p = x / y; (p, zero (p)))
@@ -198,6 +300,8 @@ struct TwicePrecision{T}
198300 lo:: T # least significant bits
199301end
200302
303+ (:: Type{<:Tuple} )(t:: TwicePrecision ) = (t. hi, t. lo)
304+
201305TwicePrecision {T} (x:: T ) where {T} = TwicePrecision {T} (x, zero (T))
202306
203307function TwicePrecision {T} (x) where {T}
@@ -288,23 +392,23 @@ end
288392
289393# Arithmetic
290394
291- function + (x:: TwicePrecision , y:: Number )
292- s_hi, s_lo = add12 (x. hi, y)
293- TwicePrecision (canonicalize2 (s_hi, s_lo+ x. lo)... )
294- end
395+ + (x:: TwicePrecision{T} , y:: T ) where {T<: AbstractFloat } =
396+ TwicePrecision {T} (dw_plus_fp (Tuple (x), y)... )
397+
295398+ (x:: Number , y:: TwicePrecision ) = y+ x
296399
297- function + (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where T
298- r = x. hi + y. hi
299- s = abs (x. hi) > abs (y. hi) ? (((x. hi - r) + y. hi) + y. lo) + x. lo : (((y. hi - r) + x. hi) + x. lo) + y. lo
300- TwicePrecision (canonicalize2 (r, s)... )
301- end
400+ + (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T<: AbstractFloat } =
401+ TwicePrecision {T} (dw_plus_dw (Tuple (x), Tuple (y))... )
402+
302403+ (x:: TwicePrecision , y:: TwicePrecision ) = + (promote (x, y)... )
303404
304405- (x:: TwicePrecision , y:: TwicePrecision ) = x + (- y)
305406- (x:: TwicePrecision , y:: Number ) = x + (- y)
306407- (x:: Number , y:: TwicePrecision ) = x + (- y)
307408
409+ * (x:: TwicePrecision{T} , y:: T ) where {T<: AbstractFloat } =
410+ TwicePrecision {T} (dw_times_fp (Tuple (x), y)... )
411+
308412function * (x:: TwicePrecision , v:: Number )
309413 v == 0 && return TwicePrecision (x. hi* v, x. lo* v)
310414 x * TwicePrecision (oftype (x. hi* v, v))
@@ -317,23 +421,31 @@ function *(x::TwicePrecision{<:IEEEFloat}, v::Integer)
317421end
318422* (v:: Number , x:: TwicePrecision ) = x* v
319423
320- function * (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T}
321- zh, zl = mul12 (x. hi, y. hi)
322- ret = TwicePrecision {T} (canonicalize2 (zh, (x. hi * y. lo + x. lo * y. hi) + zl)... )
323- ifelse (iszero (zh) | ! isfinite (zh), TwicePrecision {T} (zh, zh), ret)
324- end
424+ * (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T<: AbstractFloat } =
425+ let zh = x. hi * y. hi
426+ ifelse (
427+ ! isfinite (zh),
428+ TwicePrecision {T} (zh, zh),
429+ TwicePrecision {T} (dw_times_dw (Tuple (x), Tuple (y))... ),
430+ )
431+ end
432+
325433* (x:: TwicePrecision , y:: TwicePrecision ) = * (promote (x, y)... )
326434
435+ / (x:: TwicePrecision{T} , y:: T ) where {T<: AbstractFloat } =
436+ TwicePrecision {T} (dw_div_fp (Tuple (x), y)... )
437+
327438function / (x:: TwicePrecision , v:: Number )
328439 x / TwicePrecision (oftype (x. hi/ v, v))
329440end
330441
331- function / (x:: TwicePrecision , y:: TwicePrecision )
442+ function / (x:: TwicePrecision{T} , y:: TwicePrecision{T} ) where {T <: AbstractFloat }
332443 hi = x. hi / y. hi
333- uh, ul = mul12 (hi, y. hi)
334- lo = ((((x. hi - uh) - ul) + x. lo) - hi* y. lo)/ y. hi
335- ret = TwicePrecision (canonicalize2 (hi, lo)... )
336- ifelse (iszero (hi) | ! isfinite (hi), TwicePrecision (hi, hi), ret)
444+ ifelse (
445+ ! isfinite (hi),
446+ TwicePrecision (hi, hi),
447+ TwicePrecision {T} (dw_div_dw (Tuple (x), Tuple (y))... ),
448+ )
337449end
338450
339451# # StepRangeLen
0 commit comments