diff --git a/source/lib/src/cdsilu.cc b/source/lib/src/cdsilu.cc index e5f8a4d9c4..ed82d25db0 100644 --- a/source/lib/src/cdsilu.cc +++ b/source/lib/src/cdsilu.cc @@ -38,11 +38,13 @@ inline FPTYPE customdsilugrad(const FPTYPE x, const FPTYPE a, const FPTYPE b) { FPTYPE eax1 = std::exp(-xbar); FPTYPE eax1p1 = eax1 + (FPTYPE)1.0; FPTYPE eax1p1r = (FPTYPE)1.0 / eax1p1; + FPTYPE eax1eax1p1r = 1 - eax1p1r; FPTYPE eaxb1 = std::exp(-xbar + b); FPTYPE eaxb1p1 = eaxb1 + (FPTYPE)1.0; FPTYPE eaxb1p1r = (FPTYPE)1.0 / eaxb1p1; - return (-xbar * eax1 * eax1p1r * eax1p1r - eax1p1r) * eaxb1p1r + - ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1 * eaxb1p1r * eaxb1p1r + + FPTYPE eaxb1eaxb1p1r = 1 - eaxb1p1r; + return (-xbar * eax1eax1p1r * eax1p1r - eax1p1r) * eaxb1p1r + + ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1eaxb1p1r * eaxb1p1r + silugrad(x); } @@ -54,17 +56,18 @@ inline FPTYPE customdsilugradgrad(const FPTYPE x, FPTYPE eax1 = std::exp(-xbar); FPTYPE eax1p1 = eax1 + (FPTYPE)1.0; FPTYPE eax1p1r = (FPTYPE)1.0 / eax1p1; + FPTYPE eax1eax1p1r = 1 - eax1p1r; FPTYPE eaxb1 = std::exp(-xbar + b); FPTYPE eaxb1p1 = eaxb1 + (FPTYPE)1.0; FPTYPE eaxb1p1r = (FPTYPE)1.0 / eaxb1p1; - return ((FPTYPE)2.0 * (-xbar * eax1 * eax1p1r * eax1p1r - eax1p1r) * eaxb1 - - ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1) * - eaxb1p1r * eaxb1p1r + - (xbar * eax1 - (FPTYPE)2.0 * xbar * eax1 * eax1 * eax1p1r - - (FPTYPE)2.0 * eax1) * - eax1p1r * eax1p1r * eaxb1p1r + - (FPTYPE)2.0 * ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1 * eaxb1 * - eaxb1p1r * eaxb1p1r * eaxb1p1r + + FPTYPE eaxb1eaxb1p1r = 1 - eaxb1p1r; + return ((FPTYPE)2.0 * (-xbar * eax1eax1p1r * eax1p1r - eax1p1r) - + ((FPTYPE)1.0 - xbar * eax1p1r)) * + eaxb1eaxb1p1r * eaxb1p1r + + (xbar - (FPTYPE)2.0 * xbar * eax1eax1p1r - (FPTYPE)2.0) * eax1eax1p1r * + eax1p1r * eaxb1p1r + + (FPTYPE)2.0 * ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1eaxb1p1r * + eaxb1eaxb1p1r * eaxb1p1r + silugradgrad(x); } diff --git a/source/lib/src/gpu/cdsilu.cu b/source/lib/src/gpu/cdsilu.cu index 73e5eecb9f..b9697a3ca8 100644 --- a/source/lib/src/gpu/cdsilu.cu +++ b/source/lib/src/gpu/cdsilu.cu @@ -42,11 +42,13 @@ __device__ inline FPTYPE customdsilugrad(const FPTYPE x, FPTYPE eax1 = _exp(-xbar); FPTYPE eax1p1 = eax1 + (FPTYPE)1.0; FPTYPE eax1p1r = (FPTYPE)1.0 / eax1p1; + FPTYPE eax1eax1p1r = 1 - eax1p1r; FPTYPE eaxb1 = _exp(-xbar + b); FPTYPE eaxb1p1 = eaxb1 + (FPTYPE)1.0; FPTYPE eaxb1p1r = (FPTYPE)1.0 / eaxb1p1; - return (-xbar * eax1 * eax1p1r * eax1p1r - eax1p1r) * eaxb1p1r + - ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1 * eaxb1p1r * eaxb1p1r + + FPTYPE eaxb1eaxb1p1r = 1 - eaxb1p1r; + return (-xbar * eax1eax1p1r * eax1p1r - eax1p1r) * eaxb1p1r + + ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1eaxb1p1r * eaxb1p1r + silugrad(x); } @@ -58,17 +60,18 @@ __device__ inline FPTYPE customdsilugradgrad(const FPTYPE x, FPTYPE eax1 = _exp(-xbar); FPTYPE eax1p1 = eax1 + (FPTYPE)1.0; FPTYPE eax1p1r = (FPTYPE)1.0 / eax1p1; + FPTYPE eax1eax1p1r = 1 - eax1p1r; FPTYPE eaxb1 = _exp(-xbar + b); FPTYPE eaxb1p1 = eaxb1 + (FPTYPE)1.0; FPTYPE eaxb1p1r = (FPTYPE)1.0 / eaxb1p1; - return ((FPTYPE)2.0 * (-xbar * eax1 * eax1p1r * eax1p1r - eax1p1r) * eaxb1 - - ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1) * - eaxb1p1r * eaxb1p1r + - (xbar * eax1 - (FPTYPE)2.0 * xbar * eax1 * eax1 * eax1p1r - - (FPTYPE)2.0 * eax1) * - eax1p1r * eax1p1r * eaxb1p1r + - (FPTYPE)2.0 * ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1 * eaxb1 * - eaxb1p1r * eaxb1p1r * eaxb1p1r + + FPTYPE eaxb1eaxb1p1r = 1 - eaxb1p1r; + return ((FPTYPE)2.0 * (-xbar * eax1eax1p1r * eax1p1r - eax1p1r) - + ((FPTYPE)1.0 - xbar * eax1p1r)) * + eaxb1eaxb1p1r * eaxb1p1r + + (xbar - (FPTYPE)2.0 * xbar * eax1eax1p1r - (FPTYPE)2.0) * eax1eax1p1r * + eax1p1r * eaxb1p1r + + (FPTYPE)2.0 * ((FPTYPE)1.0 - xbar * eax1p1r) * eaxb1eaxb1p1r * + eaxb1eaxb1p1r * eaxb1p1r + silugradgrad(x); }