diff --git a/LICENSE.md b/LICENSE.md
index 8904e41b74eb0..b906f655a5817 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -73,7 +73,8 @@ The following components of Julia's standard library have separate licenses:
 - base/grisu/* (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE))
 - base/sparse/umfpack.jl (see [SUITESPARSE](http://faculty.cse.tamu.edu/davis/suitesparse.html))
 - base/sparse/cholmod.jl (see [SUITESPARSE](http://faculty.cse.tamu.edu/davis/suitesparse.html))
-- base/special/exp.jl (see [FREEBSD MSUN](https://github.com/freebsd/freebsd) [FreeBSD/2-clause BSD/Simplified BSD License])
+- base/special/exp.jl (see [FDLIBM](http://www.netlib.org/fdlibm/e_exp.c) [Freely distributable with preserved copyright notice])
+- base/special/rem_pio2.jl (see [FDLIBM](http://www.netlib.org/fdlibm/e_rem_pio2.c) [Freely distributable with preserved copyright notice])
 
 
 Julia's build process uses the following external tools:
diff --git a/base/math.jl b/base/math.jl
index 88bbf841d8da5..64cd5137ae155 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -781,25 +781,6 @@ function add22condh(xh::Float64, xl::Float64, yh::Float64, yl::Float64)
     return zh
 end
 
-@inline function ieee754_rem_pio2(x::Float64)
-    # rem_pio2 essentially computes x mod pi/2 (ie within a quarter circle)
-    # and returns the result as
-    # y between + and - pi/4 (for maximal accuracy (as the sign bit is exploited)), and
-    # n, where n specifies the integer part of the division, or, at any rate,
-    # in which quadrant we are.
-    # The invariant fulfilled by the returned values seems to be
-    #  x = y + n*pi/2 (where y = y1+y2 is a double-double and y2 is the "tail" of y).
-    # Note: for very large x (thus n), the invariant might hold only modulo 2pi
-    # (in other words, n might be off by a multiple of 4, or a multiple of 100)
-
-    # this is just wrapping up
-    # https://github.com/JuliaLang/openspecfun/blob/master/rem_pio2/e_rem_pio2.c
-
-    y = Ref{NTuple{2,Float64}}()
-    n = ccall((:__ieee754_rem_pio2, openspecfun), Cint, (Float64, Ptr{Void}), x, y)
-    return (n, y[])
-end
-
 # multiples of pi/2, as double-double (ie with "tail")
 const pi1o2_h  = 1.5707963267948966     # convert(Float64, pi * BigFloat(1/2))
 const pi1o2_l  = 6.123233995736766e-17  # convert(Float64, pi * BigFloat(1/2) - pi1o2_h)
@@ -847,23 +828,23 @@ function rem2pi end
 function rem2pi(x::Float64, ::RoundingMode{:Nearest})
     abs(x) < pi && return x
 
-    (n,y) = ieee754_rem_pio2(x)
+    n,y = rem_pio2_kernel(x)
 
     if iseven(n)
         if n & 2 == 2 # n % 4 == 2: add/subtract pi
-            if y[1] <= 0
-                return add22condh(y[1],y[2],pi2o2_h,pi2o2_l)
+            if y.hi <= 0
+                return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
             else
-                return add22condh(y[1],y[2],-pi2o2_h,-pi2o2_l)
+                return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l)
             end
         else          # n % 4 == 0: add 0
-            return y[1]
+            return y.hi+y.lo
         end
     else
         if n & 2 == 2 # n % 4 == 3: subtract pi/2
-            return add22condh(y[1],y[2],-pi1o2_h,-pi1o2_l)
+            return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l)
         else          # n % 4 == 1: add pi/2
-            return add22condh(y[1],y[2],pi1o2_h,pi1o2_l)
+            return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
         end
     end
 end
@@ -871,23 +852,23 @@ function rem2pi(x::Float64, ::RoundingMode{:ToZero})
     ax = abs(x)
     ax <= 2*Float64(pi,RoundDown) && return x
 
-    (n,y) = ieee754_rem_pio2(ax)
+    n,y = rem_pio2_kernel(x)
 
     if iseven(n)
         if n & 2 == 2 # n % 4 == 2: add pi
-            z = add22condh(y[1],y[2],pi2o2_h,pi2o2_l)
+            z = add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
         else          # n % 4 == 0: add 0 or 2pi
-            if y[1] > 0
-                z = y[1]
+            if y.hi > 0
+                z = y.hi+y.lo
             else      # negative: add 2pi
-                z = add22condh(y[1],y[2],pi4o2_h,pi4o2_l)
+                z = add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l)
             end
         end
     else
         if n & 2 == 2 # n % 4 == 3: add 3pi/2
-            z = add22condh(y[1],y[2],pi3o2_h,pi3o2_l)
+            z = add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l)
         else          # n % 4 == 1: add pi/2
-            z = add22condh(y[1],y[2],pi1o2_h,pi1o2_l)
+            z = add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
         end
     end
     copysign(z,x)
@@ -901,23 +882,23 @@ function rem2pi(x::Float64, ::RoundingMode{:Down})
         end
     end
 
-    (n,y) = ieee754_rem_pio2(x)
+    n,y = rem_pio2_kernel(x)
 
     if iseven(n)
         if n & 2 == 2 # n % 4 == 2: add pi
-            return add22condh(y[1],y[2],pi2o2_h,pi2o2_l)
+            return add22condh(y.hi,y.lo,pi2o2_h,pi2o2_l)
         else          # n % 4 == 0: add 0 or 2pi
-            if y[1] > 0
-                return y[1]
+            if y.hi > 0
+                return y.hi+y.lo
             else      # negative: add 2pi
-                return add22condh(y[1],y[2],pi4o2_h,pi4o2_l)
+                return add22condh(y.hi,y.lo,pi4o2_h,pi4o2_l)
             end
         end
     else
         if n & 2 == 2 # n % 4 == 3: add 3pi/2
-            return add22condh(y[1],y[2],pi3o2_h,pi3o2_l)
+            return add22condh(y.hi,y.lo,pi3o2_h,pi3o2_l)
         else          # n % 4 == 1: add pi/2
-            return add22condh(y[1],y[2],pi1o2_h,pi1o2_l)
+            return add22condh(y.hi,y.lo,pi1o2_h,pi1o2_l)
         end
     end
 end
@@ -930,23 +911,23 @@ function rem2pi(x::Float64, ::RoundingMode{:Up})
         end
     end
 
-    (n,y) = ieee754_rem_pio2(x)
+    n,y = rem_pio2_kernel(x)
 
     if iseven(n)
         if n & 2 == 2 # n % 4 == 2: sub pi
-            return add22condh(y[1],y[2],-pi2o2_h,-pi2o2_l)
+            return add22condh(y.hi,y.lo,-pi2o2_h,-pi2o2_l)
         else          # n % 4 == 0: sub 0 or 2pi
-            if y[1] < 0
-                return y[1]
+            if y.hi < 0
+                return y.hi+y.lo
             else      # positive: sub 2pi
-                return add22condh(y[1],y[2],-pi4o2_h,-pi4o2_l)
+                return add22condh(y.hi,y.lo,-pi4o2_h,-pi4o2_l)
             end
         end
     else
         if n & 2 == 2 # n % 4 == 3: sub pi/2
-            return add22condh(y[1],y[2],-pi1o2_h,-pi1o2_l)
+            return add22condh(y.hi,y.lo,-pi1o2_h,-pi1o2_l)
         else          # n % 4 == 1: sub 3pi/2
-            return add22condh(y[1],y[2],-pi3o2_h,-pi3o2_l)
+            return add22condh(y.hi,y.lo,-pi3o2_h,-pi3o2_l)
         end
     end
 end
@@ -1023,6 +1004,7 @@ include(joinpath("special", "exp.jl"))
 include(joinpath("special", "exp10.jl"))
 include(joinpath("special", "trig.jl"))
 include(joinpath("special", "gamma.jl"))
+include(joinpath("special", "rem_pio2.jl"))
 
 module JuliaLibm
 include(joinpath("special", "log.jl"))
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 72da28644c2c4..e49bfb5ed942b 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -1,4 +1,4 @@
-# Based on FreeBSD lib/msun/src/e_exp.c
+# Based on FDLIBM http://www.netlib.org/fdlibm/e_exp.c
 # which is made available under the following licence
 
 ## Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. Permission
diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl
new file mode 100644
index 0000000000000..9d8f830c03f92
--- /dev/null
+++ b/base/special/rem_pio2.jl
@@ -0,0 +1,294 @@
+# This file is a part of Julia. Except for the rem_pio2_kernel, and
+# cody_waite_* methods (see below) license is MIT: https://julialang.org/license
+
+# rem_pio2_kernel and cody_waite_* methods are heavily based on FDLIBM code:
+# __ieee754_rem_pio2, that is made available under the following licence:
+
+## Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+##
+## Developed at SunPro, a Sun Microsystems, Inc. business.
+## Permission to use, copy, modify, and distribute this
+## software is freely granted, provided that this notice
+## is preserved.
+
+# Bits of 1/2π
+#   1/2π == sum(x / 0x1p64^i for i,x = enumerate(INV_2PI))
+# Can be obtained by:
+#
+#    setprecision(BigFloat, 4096)
+#    I = 0.5/big(pi)
+#    for i = 1:19
+#        I *= 0x1p64
+#        k = trunc(UInt64, I)
+#        @printf "0x%016x,\n" k
+#        I -= k
+#    end
+const INV_2PI = UInt64[
+    0x28be_60db_9391_054a,
+    0x7f09_d5f4_7d4d_3770,
+    0x36d8_a566_4f10_e410,
+    0x7f94_58ea_f7ae_f158,
+    0x6dc9_1b8e_9093_74b8,
+    0x0192_4bba_8274_6487,
+    0x3f87_7ac7_2c4a_69cf,
+    0xba20_8d7d_4bae_d121,
+    0x3a67_1c09_ad17_df90,
+    0x4e64_758e_60d4_ce7d,
+    0x2721_17e2_ef7e_4a0e,
+    0xc7fe_25ff_f781_6603,
+    0xfbcb_c462_d682_9b47,
+    0xdb4d_9fb3_c9f2_c26d,
+    0xd3d1_8fd9_a797_fa8b,
+    0x5d49_eeb1_faf9_7c5e,
+    0xcf41_ce7d_e294_a4ba,
+    0x9afe_d7ec_47e3_5742,
+    0x1580_cc11_bf1e_daea]
+
+"""
+    highword(x)
+
+Return the high word of `x` as a `UInt32`.
+"""
+@inline highword(x::UInt64) = unsafe_trunc(UInt32,x >> 32)
+@inline highword(x::Float64) = highword(reinterpret(UInt64, x))
+
+"""
+    poshighword(x)
+
+Return positive part of the high word of `x` as a `UInt32`.
+"""
+@inline poshighword(x::UInt64) = unsafe_trunc(UInt32,x >> 32)&0x7fffffff
+@inline poshighword(x::Float64) = poshighword(reinterpret(UInt64, x))
+
+@inline function cody_waite_2c_pio2(x::Float64, fn, n)
+    pio2_1 = 1.57079632673412561417e+00
+    pio2_1t = 6.07710050650619224932e-11
+
+    z = muladd(-fn, pio2_1, x) # x - fn*pio2_1
+    y1 = muladd(-fn, pio2_1t, z) # z - fn*pio2_1t
+    y2 = muladd(-fn, pio2_1t, (z - y1)) # (z - y1) - fn*pio2_1t
+    n, DoubleFloat64(y1, y2)
+end
+
+@inline function cody_waite_ext_pio2(x::Float64, xhp)
+    pio2_1 = 1.57079632673412561417e+00
+    pio2_1t = 6.07710050650619224932e-11
+    pio2_2 = 6.07710050630396597660e-11
+    pio2_2t = 2.02226624879595063154e-21
+    pio2_3 = 2.02226624871116645580e-21
+    pio2_3t = 8.47842766036889956997e-32
+
+    fn = round(x*(2/pi)) # round to integer
+    # on older systems, the above could be faster with
+    # rf = 1.5/eps(Float64)
+    # fn = (x*(2/pi)+rf)-rf
+
+    r  = muladd(-fn, pio2_1, x) # x - fn*pio2_1
+    w  = fn*pio2_1t # 1st round good to 85 bit
+    j  = xhp>>20
+    y1 = r-w
+    high = highword(y1)
+    i = j-((high>>20)&0x7ff)
+    if i>16  # 2nd iteration needed, good to 118
+        t  = r
+        w  = fn*pio2_2
+        r  = t-w
+        w  = muladd(fn, pio2_2t,-((t-r)-w))
+        y1 = r-w
+        high = highword(y1)
+        i = j-((high>>20)&0x7ff)
+        if i>49 # 3rd iteration need, 151 bits acc
+            t  = r # will cover all possible cases
+            w  = fn*pio2_3
+            r  = t-w
+            w  = muladd(fn, pio2_3t, -((t-r)-w))
+            y1 = r-w
+        end
+    end
+    y2 = (r-y1)-w
+    return unsafe_trunc(Int, fn), DoubleFloat64(y1, y2)
+end
+
+"""
+    fromfraction(f::Int128)
+
+Compute a tuple of values `(z1,z2)` such that
+    ``z1 + z2 == f / 2^128``
+and the significand of `z1` has 27 trailing zeros.
+"""
+function fromfraction(f::Int128)
+    if f == 0
+        return (0.0,0.0)
+    end
+
+    # 1. get leading term truncated to 26 bits
+    s = ((f < 0) % UInt64) << 63     # sign bit
+    x = abs(f) % UInt128             # magnitude
+    n1 = 128-leading_zeros(x)         # ndigits0z(x,2)
+    m1 = ((x >> (n1-26)) % UInt64) << 27
+    d1 = ((n1-128+1021) % UInt64) << 52
+    z1 = reinterpret(Float64, s | (d1 + m1))
+
+    # 2. compute remaining term
+    x2 = (x - (UInt128(m1) << (n1-53)))
+    if x2 == 0
+        return (z1, 0.0)
+    end
+    n2 = 128-leading_zeros(x2)
+    m2 = (x2 >> (n2-53)) % UInt64
+    d2 = ((n2-128+1021) % UInt64) << 52
+    z2 = reinterpret(Float64,  s | (d2 + m2))
+    return (z1,z2)
+end
+
+function paynehanek(x::Float64)
+    # 1. Convert to form
+    #
+    #    x = X * 2^k,
+    #
+    # where 2^(n-1) <= X < 2^n  is an n-bit integer (n = 53, k = exponent(x)-52 )
+
+    # Computations are integer based, so reinterpret x as UInt64
+    u = reinterpret(UInt64, x)
+    # Strip x of exponent bits and replace with ^1
+    X = (u & significand_mask(Float64)) | (one(UInt64) << significand_bits(Float64))
+    # Get k from formula above
+    # k = exponent(x)-52
+    k = Int((u & exponent_mask(Float64)) >> significand_bits(Float64)) - exponent_bias(Float64) - significand_bits(Float64)
+
+    # 2. Let α = 1/2π, then:
+    #
+    #    α*x mod 1 ≡ [(α*2^k mod 1)*X] mod 1
+    #
+    # so we can ignore the first k bits of α. Extract the next 3 64-bit parts of α.
+    #
+    # i.e. equivalent to
+    #     setprecision(BigFloat,4096)
+    #     α  = 1/(2*big(pi))
+    #     A  = mod(ldexp(α,k), 1)
+    #     z1 = ldexp(A,64)
+    #     a1 = trunc(UInt64, z1)
+    #     z2 = ldexp(z1-a1, 64)
+    #     a2 = trunc(UInt64, z2)
+    #     z3 = ldexp(z2-a2, 64)
+    #     a3 = trunc(UInt64, z3)
+
+    # This is equivalent to
+    #     idx, shift = divrem(k, 64)
+    # but divrem is slower.
+    idx = k >> 6
+
+    shift = k - (idx << 6)
+    if shift == 0
+        @inbounds a1 = INV_2PI[idx+1]
+        @inbounds a2 = INV_2PI[idx+2]
+        @inbounds a3 = INV_2PI[idx+3]
+    else
+        # use shifts to extract the relevant 64 bit window
+        @inbounds a1 = (idx < 0 ? zero(UInt64) : INV_2PI[idx+1] << shift) | (INV_2PI[idx+2] >> (64 - shift))
+        @inbounds a2 = (INV_2PI[idx+2] << shift) | (INV_2PI[idx+3] >> (64 - shift))
+        @inbounds a3 = (INV_2PI[idx+3] << shift) | (INV_2PI[idx+4] >> (64 - shift))
+    end
+
+    # 3. Perform the multiplication:
+    #
+    #      X.  0  0  0
+    #   ×  0. a1 a2 a3
+    #   ==============
+    #      _.  w  w  _
+    #
+    # (i.e. ignoring integer and lowest bit parts of result)
+
+    w1 = UInt128(X*a1) << 64 # overflow becomes integer
+    w2 = widemul(X,a2)
+    w3 = widemul(X,a3) >> 64
+    w = w1 + w2 + w3         # quotient fraction after division by 2π
+
+    # adjust for sign of x
+    w = flipsign(w,x)
+
+    # 4. convert to quadrant, quotient fraction after division by π/2:
+    q = (((w>>125)%Int +1)>>1) # nearest quadrant
+    f = (w<<2) % Int128 # fraction part of quotient after division by π/2, taking values on [-0.5,0.5)
+
+    # 5. convert quotient fraction to split precision Float64
+    z_hi,z_lo = fromfraction(f)
+
+    # 6. multiply by π/2
+    pio2 = 1.5707963267948966
+    pio2_hi = 1.5707963407039642
+    pio2_lo = -1.3909067614167116e-8
+    y_hi = (z_hi+z_lo)*pio2
+    y_lo = (((z_hi*pio2_hi - y_hi) + z_hi*pio2_lo) + z_lo*pio2_hi) + z_lo*pio2_lo
+    return q, DoubleFloat64(y_hi, y_lo)
+end
+
+"""
+    rem_pio2_kernel(x, xhp)
+
+Return the remainder of `x` modulo π/2 as a double-double pair, along with a `k`
+such that ``k \\mod 3 == K \\mod 3`` where ``K*π/2 = x - rem``. Note, that it is
+only meant for use when ``|x|>=π/4``, and that ``π/2`` is always subtracted or
+added for ``π/4<|x|<=π/2`` instead of simply returning `x`.
+"""
+@inline function rem_pio2_kernel(x::Float64)
+    xhp = poshighword(x)
+    #  xhp <= highword(5pi/4) implies |x| ~<= 5pi/4,
+    if xhp <= 0x400f6a7a
+        #  last five bits of xhp == last five bits of highword(pi/2) or
+        #  highword(2pi/2) implies |x| ~= pi/2 or 2pi/2,
+        if (xhp & 0xfffff) == 0x921fb # use precise Cody Waite scheme
+            return cody_waite_ext_pio2(x, xhp)
+        end
+        # use Cody Waite with two constants
+        #  xhp <= highword(3pi/4) implies |x| ~<= 3pi/4
+        if xhp <= 0x4002d97c
+            if x > 0.0
+                return cody_waite_2c_pio2(x, 1.0, 1)
+            else
+                return cody_waite_2c_pio2(x, -1.0, -1)
+            end
+        # 3pi/4 < |x| <= 5pi/4
+        else
+            if x > 0.0
+                return cody_waite_2c_pio2(x, 2.0, 2)
+            else
+                return cody_waite_2c_pio2(x, -2.0, -2)
+            end
+        end
+    end
+    #  xhp <= highword(9pi/4) implies |x| ~<= 9pi/4
+    if xhp <= 0x401c463b
+        #  xhp <= highword(7pi/4) implies |x| ~<= 7pi/4
+        if xhp <= 0x4015fdbc
+            #  xhp == highword(3pi/2) implies |x| ~= 3pi/2
+            if xhp == 0x4012d97c # use precise Cody Waite scheme
+                return cody_waite_ext_pio2(x, xhp)
+            end
+            # use Cody Waite with two constants
+            if x > 0.0
+                return cody_waite_2c_pio2(x, 3.0, 3)
+            else
+                return cody_waite_2c_pio2(x, -3.0, -3)
+            end
+        # 7pi/4 < |x| =< 9pi/4
+        else
+            #  xhp == highword(4pi/2) implies |x| ~= 4pi/2
+            if xhp == 0x401921fb # use precise Cody Waite scheme
+                return cody_waite_ext_pio2(x, xhp)
+            end
+            # use Cody Waite with two constants
+            if x > 0.0
+                return cody_waite_2c_pio2(x, 4.0, 4)
+            else
+                return cody_waite_2c_pio2(x, -4.0, -4)
+            end
+        end
+    end
+    #  xhp < highword(2.0^20*pi/2) implies |x| ~< 2^20*pi/2
+    if xhp < 0x413921fb # use precise Cody Waite scheme
+        return cody_waite_ext_pio2(x, xhp)
+    end
+    # if |x| >= 2^20*pi/2 switch to Payne Hanek
+    return paynehanek(x)
+end
diff --git a/contrib/add_license_to_files.jl b/contrib/add_license_to_files.jl
index 5a523ba6edb91..57ce6788e0930 100644
--- a/contrib/add_license_to_files.jl
+++ b/contrib/add_license_to_files.jl
@@ -35,6 +35,7 @@ const skipfiles = [
     # see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98099389
     "../base/special/trig.jl",
     "../base/special/exp.jl",
+    "../base/special/rem_pio2.jl",
     "../base/linalg/givens.jl",
     #
     "../src/abi_llvm.cpp",
diff --git a/test/mod2pi.jl b/test/mod2pi.jl
index 623c08376bcd9..8ecdafe3a5e11 100644
--- a/test/mod2pi.jl
+++ b/test/mod2pi.jl
@@ -194,3 +194,75 @@ testModPi()
 @test mod2pi(355.0f0) ≈ 3.1416228f0
 @test mod2pi(Int64(2)^60) == mod2pi(2.0^60)
 @test_throws ArgumentError mod2pi(Int64(2)^60-1)
+
+@testset "rem_pio2_kernel" begin
+    # test worst case
+    x = 6381956970095103.0 * 2.0^797
+    a = setprecision(BigFloat, 4096) do
+        rem(big(x), big(pi)/2, RoundNearest)
+    end
+
+    n, yrem = Base.Math.rem_pio2_kernel(x)
+    y=yrem.hi+yrem.lo
+    @test a-y<nextfloat(y)/2
+    # The following has easy and hard cases in each interval. A hard case is one
+    # where x ≈ k*pi/2 for some integer k.
+    cases = [0.0, pi/6, # -π/4 <= x <= π/4
+             2*pi/4-0.1, 2*pi/4, # -2π/4 <= x <= 2π/4
+             3*pi/4-0.1, # -3π/4 <= x <= 3π/4
+             pi-0.1, Float64(pi), # -4π/4 <= x <= 4π/4
+             5*pi/4-0.1, # -5π/4 <= x <= 5π/4
+             6*pi/4-0.1, 6*pi/4, # -6π/4 <= x <= 6π/4
+             7*pi/4-0.1, # -7π/4 <= x <= 7π/4
+             2*pi, 2*pi-0.1, # -8π/4 <= x <= 8π/4
+             9*pi/4-0.1, # -9π/4 <= x <= 9π/4
+             2.0^10*pi/4, # -2.0^20π/2 <= x <= 2.0^20π/2
+             2.0^30*pi/4, # |x| >= 2.0^20π/2, idx < 0
+             2.0^80*pi/4] # |x| >= 2.0^20π/2, idx > 0-0.22370138542135648
+
+     # ieee754_rem_pio2_return contains the returned value from the ieee754_rem_pio2
+     # function in openlibm: https://github.com/JuliaLang/openlibm/blob/0598080ca09468490a13ae393ba17d8620c1b201/src/e_rem_pio2.c
+     ieee754_rem_pio2_return = [1.5707963267948966 1.5707963267948966;
+                                1.0471975511965979 -1.0471975511965979;
+                                0.10000000000000014 -0.10000000000000014;
+                                6.123233995736766e-17 -6.123233995736766e-17;
+                                -0.6853981633974481 0.6853981633974481;
+                                0.10000000000000021 -0.10000000000000021;
+                                1.2246467991473532e-16 -1.2246467991473532e-16;
+                                -0.6853981633974481 0.6853981633974481;
+                                0.09999999999999983 -0.09999999999999983;
+                                1.8369701987210297e-16 -1.8369701987210297e-16;
+                                -0.6853981633974484 0.6853981633974484;
+                                2.4492935982947064e-16 -2.4492935982947064e-16;
+                                0.0999999999999999 -0.0999999999999999;
+                                -0.6853981633974484 0.6853981633974484;
+                                3.135095805817224e-14 -3.135095805817224e-14;
+                                3.287386219680602e-8 -3.287386219680602e-8;
+                                -0.1757159771004682 0.1757159771004682]'
+
+    for (i, case) in enumerate(cases)
+        # negative argument
+        n, ret = Base.Math.rem_pio2_kernel(-case)
+        ret_sum = ret.hi+ret.lo
+        ulp_error = (ret_sum-ieee754_rem_pio2_return[1, i])/eps(ieee754_rem_pio2_return[1, i])
+        @test ulp_error <= 0.5
+        diff = Float64(mod(big(-case), big(pi)/2))-(ret.hi+ret.lo)
+        @test abs(diff) in (0.0, 1.5707963267948966, 1.5707963267948968)
+        # positive argument
+        n, ret = Base.Math.rem_pio2_kernel(case)
+        ret_sum = ret.hi+ret.lo
+        ulp_error = (ret_sum-ieee754_rem_pio2_return[2, i])/eps(ieee754_rem_pio2_return[2, i])
+        @test ulp_error <= 0.5
+        diff = Float64(mod(big(case), big(pi)/2))-(ret.hi+ret.lo)
+        @test abs(diff) in (0.0, 1.5707963267948966, 1.5707963267948968)
+    end
+end
+@testset "rem_pio2_kernel and mod2pi" begin
+    for int in (3632982096228748, 1135326194816)
+        bignum = int*big(pi)/2+0.00001
+        bigrem = rem(bignum, big(pi)/2, RoundDown)
+        fnum = Float64(bignum)
+        n, ret = Base.Math.rem_pio2_kernel(fnum)
+        @test mod2pi(fnum) == (ret.hi+ret.lo)
+    end
+end