diff --git a/LICENSE.md b/LICENSE.md index 8904e41b74eb0..b906f655a5817 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -73,7 +73,8 @@ The following components of Julia's standard library have separate licenses: - base/grisu/* (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE)) - base/sparse/umfpack.jl (see [SUITESPARSE](http://faculty.cse.tamu.edu/davis/suitesparse.html)) - base/sparse/cholmod.jl (see [SUITESPARSE](http://faculty.cse.tamu.edu/davis/suitesparse.html)) -- base/special/exp.jl (see [FREEBSD MSUN](https://github.com/freebsd/freebsd) [FreeBSD/2-clause BSD/Simplified BSD License]) +- base/special/exp.jl (see [FDLIBM](http://www.netlib.org/fdlibm/e_exp.c) [Freely distributable with preserved copyright notice]) +- base/special/rem_pio2.jl (see [FDLIBM](http://www.netlib.org/fdlibm/e_rem_pio2.c) [Freely distributable with preserved copyright notice]) Julia's build process uses the following external tools: diff --git a/base/math.jl b/base/math.jl index 88bbf841d8da5..64cd5137ae155 100644 --- a/base/math.jl +++ b/base/math.jl @@ -781,25 +781,6 @@ function add22condh(xh::Float64, xl::Float64, yh::Float64, yl::Float64) return zh end -@inline function ieee754_rem_pio2(x::Float64) - # rem_pio2 essentially computes x mod pi/2 (ie within a quarter circle) - # and returns the result as - # y between + and - pi/4 (for maximal accuracy (as the sign bit is exploited)), and - # n, where n specifies the integer part of the division, or, at any rate, - # in which quadrant we are. - # The invariant fulfilled by the returned values seems to be - # x = y + n*pi/2 (where y = y1+y2 is a double-double and y2 is the "tail" of y). - # Note: for very large x (thus n), the invariant might hold only modulo 2pi - # (in other words, n might be off by a multiple of 4, or a multiple of 100) - - # this is just wrapping up - # https://github.com/JuliaLang/openspecfun/blob/master/rem_pio2/e_rem_pio2.c - - y = Ref{NTuple{2,Float64}}() - n = ccall((:__ieee754_rem_pio2, openspecfun), Cint, (Float64, Ptr{Void}), x, y) - return (n, y[]) -end - # multiples of pi/2, as double-double (ie with "tail") const pi1o2_h = 1.5707963267948966 # convert(Float64, pi * BigFloat(1/2)) const pi1o2_l = 6.123233995736766e-17 # convert(Float64, pi * BigFloat(1/2) - pi1o2_h) @@ -847,23 +828,23 @@ function rem2pi end function rem2pi(x::Float64, ::RoundingMode{:Nearest}) abs(x) < pi && return x - (n,y) = ieee754_rem_pio2(x) + n,y = rem_pio2_kernel(x) if iseven(n) if n & 2 == 2 # n % 4 == 2: add/subtract pi - if y[1] <= 0 - return add22condh(y[1],y[2],pi2o2_h,pi2o2_l) + if y.hi <= 0 + return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l) else - return add22condh(y[1],y[2],-pi2o2_h,-pi2o2_l) + return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l) end else # n % 4 == 0: add 0 - return y[1] + return y.hi+y.lo end else if n & 2 == 2 # n % 4 == 3: subtract pi/2 - return add22condh(y[1],y[2],-pi1o2_h,-pi1o2_l) + return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l) else # n % 4 == 1: add pi/2 - return add22condh(y[1],y[2],pi1o2_h,pi1o2_l) + return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l) end end end @@ -871,23 +852,23 @@ function rem2pi(x::Float64, ::RoundingMode{:ToZero}) ax = abs(x) ax <= 2*Float64(pi,RoundDown) && return x - (n,y) = ieee754_rem_pio2(ax) + n,y = rem_pio2_kernel(x) if iseven(n) if n & 2 == 2 # n % 4 == 2: add pi - z = add22condh(y[1],y[2],pi2o2_h,pi2o2_l) + z = add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l) else # n % 4 == 0: add 0 or 2pi - if y[1] > 0 - z = y[1] + if y.hi > 0 + z = y.hi+y.lo else # negative: add 2pi - z = add22condh(y[1],y[2],pi4o2_h,pi4o2_l) + z = add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l) end end else if n & 2 == 2 # n % 4 == 3: add 3pi/2 - z = add22condh(y[1],y[2],pi3o2_h,pi3o2_l) + z = add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l) else # n % 4 == 1: add pi/2 - z = add22condh(y[1],y[2],pi1o2_h,pi1o2_l) + z = add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l) end end copysign(z,x) @@ -901,23 +882,23 @@ function rem2pi(x::Float64, ::RoundingMode{:Down}) end end - (n,y) = ieee754_rem_pio2(x) + n,y = rem_pio2_kernel(x) if iseven(n) if n & 2 == 2 # n % 4 == 2: add pi - return add22condh(y[1],y[2],pi2o2_h,pi2o2_l) + return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l) else # n % 4 == 0: add 0 or 2pi - if y[1] > 0 - return y[1] + if y.hi > 0 + return y.hi+y.lo else # negative: add 2pi - return add22condh(y[1],y[2],pi4o2_h,pi4o2_l) + return add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l) end end else if n & 2 == 2 # n % 4 == 3: add 3pi/2 - return add22condh(y[1],y[2],pi3o2_h,pi3o2_l) + return add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l) else # n % 4 == 1: add pi/2 - return add22condh(y[1],y[2],pi1o2_h,pi1o2_l) + return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l) end end end @@ -930,23 +911,23 @@ function rem2pi(x::Float64, ::RoundingMode{:Up}) end end - (n,y) = ieee754_rem_pio2(x) + n,y = rem_pio2_kernel(x) if iseven(n) if n & 2 == 2 # n % 4 == 2: sub pi - return add22condh(y[1],y[2],-pi2o2_h,-pi2o2_l) + return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l) else # n % 4 == 0: sub 0 or 2pi - if y[1] < 0 - return y[1] + if y.hi < 0 + return y.hi+y.lo else # positive: sub 2pi - return add22condh(y[1],y[2],-pi4o2_h,-pi4o2_l) + return add22condh(y.hi,y.lo,-pi4o2_h,-pi4o2_l) end end else if n & 2 == 2 # n % 4 == 3: sub pi/2 - return add22condh(y[1],y[2],-pi1o2_h,-pi1o2_l) + return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l) else # n % 4 == 1: sub 3pi/2 - return add22condh(y[1],y[2],-pi3o2_h,-pi3o2_l) + return add22condh(y.hi,y.lo,-pi3o2_h,-pi3o2_l) end end end @@ -1023,6 +1004,7 @@ include(joinpath("special", "exp.jl")) include(joinpath("special", "exp10.jl")) include(joinpath("special", "trig.jl")) include(joinpath("special", "gamma.jl")) +include(joinpath("special", "rem_pio2.jl")) module JuliaLibm include(joinpath("special", "log.jl")) diff --git a/base/special/exp.jl b/base/special/exp.jl index 72da28644c2c4..e49bfb5ed942b 100644 --- a/base/special/exp.jl +++ b/base/special/exp.jl @@ -1,4 +1,4 @@ -# Based on FreeBSD lib/msun/src/e_exp.c +# Based on FDLIBM http://www.netlib.org/fdlibm/e_exp.c # which is made available under the following licence ## Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. Permission diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl new file mode 100644 index 0000000000000..9d8f830c03f92 --- /dev/null +++ b/base/special/rem_pio2.jl @@ -0,0 +1,294 @@ +# This file is a part of Julia. Except for the rem_pio2_kernel, and +# cody_waite_* methods (see below) license is MIT: https://julialang.org/license + +# rem_pio2_kernel and cody_waite_* methods are heavily based on FDLIBM code: +# __ieee754_rem_pio2, that is made available under the following licence: + +## Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. +## +## Developed at SunPro, a Sun Microsystems, Inc. business. +## Permission to use, copy, modify, and distribute this +## software is freely granted, provided that this notice +## is preserved. + +# Bits of 1/2π +# 1/2π == sum(x / 0x1p64^i for i,x = enumerate(INV_2PI)) +# Can be obtained by: +# +# setprecision(BigFloat, 4096) +# I = 0.5/big(pi) +# for i = 1:19 +# I *= 0x1p64 +# k = trunc(UInt64, I) +# @printf "0x%016x,\n" k +# I -= k +# end +const INV_2PI = UInt64[ + 0x28be_60db_9391_054a, + 0x7f09_d5f4_7d4d_3770, + 0x36d8_a566_4f10_e410, + 0x7f94_58ea_f7ae_f158, + 0x6dc9_1b8e_9093_74b8, + 0x0192_4bba_8274_6487, + 0x3f87_7ac7_2c4a_69cf, + 0xba20_8d7d_4bae_d121, + 0x3a67_1c09_ad17_df90, + 0x4e64_758e_60d4_ce7d, + 0x2721_17e2_ef7e_4a0e, + 0xc7fe_25ff_f781_6603, + 0xfbcb_c462_d682_9b47, + 0xdb4d_9fb3_c9f2_c26d, + 0xd3d1_8fd9_a797_fa8b, + 0x5d49_eeb1_faf9_7c5e, + 0xcf41_ce7d_e294_a4ba, + 0x9afe_d7ec_47e3_5742, + 0x1580_cc11_bf1e_daea] + +""" + highword(x) + +Return the high word of `x` as a `UInt32`. +""" +@inline highword(x::UInt64) = unsafe_trunc(UInt32,x >> 32) +@inline highword(x::Float64) = highword(reinterpret(UInt64, x)) + +""" + poshighword(x) + +Return positive part of the high word of `x` as a `UInt32`. +""" +@inline poshighword(x::UInt64) = unsafe_trunc(UInt32,x >> 32)&0x7fffffff +@inline poshighword(x::Float64) = poshighword(reinterpret(UInt64, x)) + +@inline function cody_waite_2c_pio2(x::Float64, fn, n) + pio2_1 = 1.57079632673412561417e+00 + pio2_1t = 6.07710050650619224932e-11 + + z = muladd(-fn, pio2_1, x) # x - fn*pio2_1 + y1 = muladd(-fn, pio2_1t, z) # z - fn*pio2_1t + y2 = muladd(-fn, pio2_1t, (z - y1)) # (z - y1) - fn*pio2_1t + n, DoubleFloat64(y1, y2) +end + +@inline function cody_waite_ext_pio2(x::Float64, xhp) + pio2_1 = 1.57079632673412561417e+00 + pio2_1t = 6.07710050650619224932e-11 + pio2_2 = 6.07710050630396597660e-11 + pio2_2t = 2.02226624879595063154e-21 + pio2_3 = 2.02226624871116645580e-21 + pio2_3t = 8.47842766036889956997e-32 + + fn = round(x*(2/pi)) # round to integer + # on older systems, the above could be faster with + # rf = 1.5/eps(Float64) + # fn = (x*(2/pi)+rf)-rf + + r = muladd(-fn, pio2_1, x) # x - fn*pio2_1 + w = fn*pio2_1t # 1st round good to 85 bit + j = xhp>>20 + y1 = r-w + high = highword(y1) + i = j-((high>>20)&0x7ff) + if i>16 # 2nd iteration needed, good to 118 + t = r + w = fn*pio2_2 + r = t-w + w = muladd(fn, pio2_2t,-((t-r)-w)) + y1 = r-w + high = highword(y1) + i = j-((high>>20)&0x7ff) + if i>49 # 3rd iteration need, 151 bits acc + t = r # will cover all possible cases + w = fn*pio2_3 + r = t-w + w = muladd(fn, pio2_3t, -((t-r)-w)) + y1 = r-w + end + end + y2 = (r-y1)-w + return unsafe_trunc(Int, fn), DoubleFloat64(y1, y2) +end + +""" + fromfraction(f::Int128) + +Compute a tuple of values `(z1,z2)` such that + ``z1 + z2 == f / 2^128`` +and the significand of `z1` has 27 trailing zeros. +""" +function fromfraction(f::Int128) + if f == 0 + return (0.0,0.0) + end + + # 1. get leading term truncated to 26 bits + s = ((f < 0) % UInt64) << 63 # sign bit + x = abs(f) % UInt128 # magnitude + n1 = 128-leading_zeros(x) # ndigits0z(x,2) + m1 = ((x >> (n1-26)) % UInt64) << 27 + d1 = ((n1-128+1021) % UInt64) << 52 + z1 = reinterpret(Float64, s | (d1 + m1)) + + # 2. compute remaining term + x2 = (x - (UInt128(m1) << (n1-53))) + if x2 == 0 + return (z1, 0.0) + end + n2 = 128-leading_zeros(x2) + m2 = (x2 >> (n2-53)) % UInt64 + d2 = ((n2-128+1021) % UInt64) << 52 + z2 = reinterpret(Float64, s | (d2 + m2)) + return (z1,z2) +end + +function paynehanek(x::Float64) + # 1. Convert to form + # + # x = X * 2^k, + # + # where 2^(n-1) <= X < 2^n is an n-bit integer (n = 53, k = exponent(x)-52 ) + + # Computations are integer based, so reinterpret x as UInt64 + u = reinterpret(UInt64, x) + # Strip x of exponent bits and replace with ^1 + X = (u & significand_mask(Float64)) | (one(UInt64) << significand_bits(Float64)) + # Get k from formula above + # k = exponent(x)-52 + k = Int((u & exponent_mask(Float64)) >> significand_bits(Float64)) - exponent_bias(Float64) - significand_bits(Float64) + + # 2. Let α = 1/2π, then: + # + # α*x mod 1 ≡ [(α*2^k mod 1)*X] mod 1 + # + # so we can ignore the first k bits of α. Extract the next 3 64-bit parts of α. + # + # i.e. equivalent to + # setprecision(BigFloat,4096) + # α = 1/(2*big(pi)) + # A = mod(ldexp(α,k), 1) + # z1 = ldexp(A,64) + # a1 = trunc(UInt64, z1) + # z2 = ldexp(z1-a1, 64) + # a2 = trunc(UInt64, z2) + # z3 = ldexp(z2-a2, 64) + # a3 = trunc(UInt64, z3) + + # This is equivalent to + # idx, shift = divrem(k, 64) + # but divrem is slower. + idx = k >> 6 + + shift = k - (idx << 6) + if shift == 0 + @inbounds a1 = INV_2PI[idx+1] + @inbounds a2 = INV_2PI[idx+2] + @inbounds a3 = INV_2PI[idx+3] + else + # use shifts to extract the relevant 64 bit window + @inbounds a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift)) + @inbounds a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift)) + @inbounds a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift)) + end + + # 3. Perform the multiplication: + # + # X. 0 0 0 + # × 0. a1 a2 a3 + # ============== + # _. w w _ + # + # (i.e. ignoring integer and lowest bit parts of result) + + w1 = UInt128(X*a1) << 64 # overflow becomes integer + w2 = widemul(X,a2) + w3 = widemul(X,a3) >> 64 + w = w1 + w2 + w3 # quotient fraction after division by 2π + + # adjust for sign of x + w = flipsign(w,x) + + # 4. convert to quadrant, quotient fraction after division by π/2: + q = (((w>>125)%Int +1)>>1) # nearest quadrant + f = (w<<2) % Int128 # fraction part of quotient after division by π/2, taking values on [-0.5,0.5) + + # 5. convert quotient fraction to split precision Float64 + z_hi,z_lo = fromfraction(f) + + # 6. multiply by π/2 + pio2 = 1.5707963267948966 + pio2_hi = 1.5707963407039642 + pio2_lo = -1.3909067614167116e-8 + y_hi = (z_hi+z_lo)*pio2 + y_lo = (((z_hi*pio2_hi - y_hi) + z_hi*pio2_lo) + z_lo*pio2_hi) + z_lo*pio2_lo + return q, DoubleFloat64(y_hi, y_lo) +end + +""" + rem_pio2_kernel(x, xhp) + +Return the remainder of `x` modulo π/2 as a double-double pair, along with a `k` +such that ``k \\mod 3 == K \\mod 3`` where ``K*π/2 = x - rem``. Note, that it is +only meant for use when ``|x|>=π/4``, and that ``π/2`` is always subtracted or +added for ``π/4<|x|<=π/2`` instead of simply returning `x`. +""" +@inline function rem_pio2_kernel(x::Float64) + xhp = poshighword(x) + # xhp <= highword(5pi/4) implies |x| ~<= 5pi/4, + if xhp <= 0x400f6a7a + # last five bits of xhp == last five bits of highword(pi/2) or + # highword(2pi/2) implies |x| ~= pi/2 or 2pi/2, + if (xhp & 0xfffff) == 0x921fb # use precise Cody Waite scheme + return cody_waite_ext_pio2(x, xhp) + end + # use Cody Waite with two constants + # xhp <= highword(3pi/4) implies |x| ~<= 3pi/4 + if xhp <= 0x4002d97c + if x > 0.0 + return cody_waite_2c_pio2(x, 1.0, 1) + else + return cody_waite_2c_pio2(x, -1.0, -1) + end + # 3pi/4 < |x| <= 5pi/4 + else + if x > 0.0 + return cody_waite_2c_pio2(x, 2.0, 2) + else + return cody_waite_2c_pio2(x, -2.0, -2) + end + end + end + # xhp <= highword(9pi/4) implies |x| ~<= 9pi/4 + if xhp <= 0x401c463b + # xhp <= highword(7pi/4) implies |x| ~<= 7pi/4 + if xhp <= 0x4015fdbc + # xhp == highword(3pi/2) implies |x| ~= 3pi/2 + if xhp == 0x4012d97c # use precise Cody Waite scheme + return cody_waite_ext_pio2(x, xhp) + end + # use Cody Waite with two constants + if x > 0.0 + return cody_waite_2c_pio2(x, 3.0, 3) + else + return cody_waite_2c_pio2(x, -3.0, -3) + end + # 7pi/4 < |x| =< 9pi/4 + else + # xhp == highword(4pi/2) implies |x| ~= 4pi/2 + if xhp == 0x401921fb # use precise Cody Waite scheme + return cody_waite_ext_pio2(x, xhp) + end + # use Cody Waite with two constants + if x > 0.0 + return cody_waite_2c_pio2(x, 4.0, 4) + else + return cody_waite_2c_pio2(x, -4.0, -4) + end + end + end + # xhp < highword(2.0^20*pi/2) implies |x| ~< 2^20*pi/2 + if xhp < 0x413921fb # use precise Cody Waite scheme + return cody_waite_ext_pio2(x, xhp) + end + # if |x| >= 2^20*pi/2 switch to Payne Hanek + return paynehanek(x) +end diff --git a/contrib/add_license_to_files.jl b/contrib/add_license_to_files.jl index 5a523ba6edb91..57ce6788e0930 100644 --- a/contrib/add_license_to_files.jl +++ b/contrib/add_license_to_files.jl @@ -35,6 +35,7 @@ const skipfiles = [ # see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98099389 "../base/special/trig.jl", "../base/special/exp.jl", + "../base/special/rem_pio2.jl", "../base/linalg/givens.jl", # "../src/abi_llvm.cpp", diff --git a/test/mod2pi.jl b/test/mod2pi.jl index 623c08376bcd9..8ecdafe3a5e11 100644 --- a/test/mod2pi.jl +++ b/test/mod2pi.jl @@ -194,3 +194,75 @@ testModPi() @test mod2pi(355.0f0) ≈ 3.1416228f0 @test mod2pi(Int64(2)^60) == mod2pi(2.0^60) @test_throws ArgumentError mod2pi(Int64(2)^60-1) + +@testset "rem_pio2_kernel" begin + # test worst case + x = 6381956970095103.0 * 2.0^797 + a = setprecision(BigFloat, 4096) do + rem(big(x), big(pi)/2, RoundNearest) + end + + n, yrem = Base.Math.rem_pio2_kernel(x) + y=yrem.hi+yrem.lo + @test a-y= 2.0^20π/2, idx < 0 + 2.0^80*pi/4] # |x| >= 2.0^20π/2, idx > 0-0.22370138542135648 + + # ieee754_rem_pio2_return contains the returned value from the ieee754_rem_pio2 + # function in openlibm: https://github.com/JuliaLang/openlibm/blob/0598080ca09468490a13ae393ba17d8620c1b201/src/e_rem_pio2.c + ieee754_rem_pio2_return = [1.5707963267948966 1.5707963267948966; + 1.0471975511965979 -1.0471975511965979; + 0.10000000000000014 -0.10000000000000014; + 6.123233995736766e-17 -6.123233995736766e-17; + -0.6853981633974481 0.6853981633974481; + 0.10000000000000021 -0.10000000000000021; + 1.2246467991473532e-16 -1.2246467991473532e-16; + -0.6853981633974481 0.6853981633974481; + 0.09999999999999983 -0.09999999999999983; + 1.8369701987210297e-16 -1.8369701987210297e-16; + -0.6853981633974484 0.6853981633974484; + 2.4492935982947064e-16 -2.4492935982947064e-16; + 0.0999999999999999 -0.0999999999999999; + -0.6853981633974484 0.6853981633974484; + 3.135095805817224e-14 -3.135095805817224e-14; + 3.287386219680602e-8 -3.287386219680602e-8; + -0.1757159771004682 0.1757159771004682]' + + for (i, case) in enumerate(cases) + # negative argument + n, ret = Base.Math.rem_pio2_kernel(-case) + ret_sum = ret.hi+ret.lo + ulp_error = (ret_sum-ieee754_rem_pio2_return[1, i])/eps(ieee754_rem_pio2_return[1, i]) + @test ulp_error <= 0.5 + diff = Float64(mod(big(-case), big(pi)/2))-(ret.hi+ret.lo) + @test abs(diff) in (0.0, 1.5707963267948966, 1.5707963267948968) + # positive argument + n, ret = Base.Math.rem_pio2_kernel(case) + ret_sum = ret.hi+ret.lo + ulp_error = (ret_sum-ieee754_rem_pio2_return[2, i])/eps(ieee754_rem_pio2_return[2, i]) + @test ulp_error <= 0.5 + diff = Float64(mod(big(case), big(pi)/2))-(ret.hi+ret.lo) + @test abs(diff) in (0.0, 1.5707963267948966, 1.5707963267948968) + end +end +@testset "rem_pio2_kernel and mod2pi" begin + for int in (3632982096228748, 1135326194816) + bignum = int*big(pi)/2+0.00001 + bigrem = rem(bignum, big(pi)/2, RoundDown) + fnum = Float64(bignum) + n, ret = Base.Math.rem_pio2_kernel(fnum) + @test mod2pi(fnum) == (ret.hi+ret.lo) + end +end