Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion paddle/phi/kernels/cpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,8 @@ PD_REGISTER_ACTIVATION_GRAD_KERNEL(thresholded_relu_grad,
PD_REGISTER_ACTIVATION_GRAD_KERNEL(relu6_grad, Relu6GradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(tanh_shrink_grad,
TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(silu_grad, SiluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(mish_grad, MishGradKernel)
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/cpu/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ PD_REGISTER_ACTIVATION_KERNEL(leaky_relu, LeakyReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(thresholded_relu, ThresholdedReluKernel)
PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(silu, SiluKernel)
PD_REGISTER_ACTIVATION_KERNEL(mish, MishKernel)
Expand Down
28 changes: 28 additions & 0 deletions paddle/phi/kernels/funcs/activation_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1916,6 +1916,21 @@ struct TanhShrinkGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

template <typename T>
struct TanhShrinkGradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
template <typename Device,
typename X,
typename Out,
typename dOut,
typename dX>
void operator()(Device d, X x, Out out UNUSED, dOut dout, dX dx) const {
dx.device(d) = dout * (x.tanh() * x.tanh()).unaryExpr(Conj<T>());
}

static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

// tanhshrink(x) = x - tanh(x)
// where tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
template <typename T>
Expand Down Expand Up @@ -4388,6 +4403,19 @@ struct CudaTanhShrinkGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

template <typename T>
struct CudaTanhShrinkGradFunctor<ComplexType<T>>
: public BaseActivationFunctor<ComplexType<T>> {
// dx = dout * tanh(x)^2
__device__ __forceinline__ ComplexType<T> operator()(
const ComplexType<T> arg_dout, const ComplexType<T> arg_x) const {
return static_cast<ComplexType<T>>(arg_dout *
conj(tanh(arg_x) * tanh(arg_x)));
}

static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; }
};

template <typename T>
struct CudaHardShrinkFunctor : public BaseActivationFunctor<T> {
T zero = static_cast<T>(0.0f);
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/kernels/gpu/activation_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,8 @@ PD_REGISTER_KERNEL(exp_grad,

PD_REGISTER_ACTIVATION_GRAD_KERNEL(softshrink_grad, SoftShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(hard_shrink_grad, HardShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(tanh_shrink_grad, TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(tanh_shrink_grad,
TanhShrinkGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL_WITH_COMPLEX(silu_grad, SiluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_grad, EluGradKernel)
PD_REGISTER_ACTIVATION_GRAD_KERNEL(elu_double_grad, EluDoubleGradKernel)
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/activation_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ PD_REGISTER_KERNEL(square,

PD_REGISTER_ACTIVATION_KERNEL(hard_shrink, HardShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(softshrink, SoftShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(tanh_shrink, TanhShrinkKernel)
PD_REGISTER_ACTIVATION_KERNEL(elu, EluKernel)
PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(silu, SiluKernel)
PD_REGISTER_ACTIVATION_KERNEL_WITH_COMPLEX(softsign, SoftsignKernel)
Expand Down
14 changes: 12 additions & 2 deletions python/paddle/nn/functional/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ def tanhshrink(x, name=None):
tanhshrink(x) = x - tanh(x)

Args:
x (Tensor): The input Tensor with data type float32, float64.
x (Tensor): The input Tensor with data type float32, float64, complex64, complex128.
name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

Returns:
Expand All @@ -1533,7 +1533,17 @@ def tanhshrink(x, name=None):
return _C_ops.tanh_shrink(x)
else:
check_variable_and_dtype(
x, 'x', ['float16', 'uint16', 'float32', 'float64'], 'tanhshrink'
x,
'x',
[
'float16',
'uint16',
'float32',
'float64',
'complex64',
'complex128',
],
'tanhshrink',
)
helper = LayerHelper('tanh_shrink', **locals())
out = helper.create_variable_for_type_inference(x.dtype)
Expand Down
28 changes: 28 additions & 0 deletions test/legacy_test/test_activation_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,11 @@ def setUp(self):

np.random.seed(1024)
x = np.random.uniform(10, 20, self.shape).astype(self.dtype)
if self.dtype == np.complex64 or self.dtype == np.complex128:
x = (
np.random.uniform(10, 20, self.shape)
+ 1j * np.random.uniform(10, 20, self.shape)
).astype(self.dtype)
out = ref_tanhshrink(x)
self.inputs = {'X': OpTest.np_dtype_to_base_dtype(x)}
self.outputs = {'Out': out}
Expand All @@ -1278,6 +1283,29 @@ def init_shape(self):
self.shape = []


class TestTanhshrinkComplex64(TestActivation):
def init_dtype(self):
self.dtype = np.complex64

def test_api_complex(self):
paddle.disable_static()
for device in devices:
if device == 'cpu' or (
device == 'gpu' and paddle.is_compiled_with_cuda()
):
np_x = np.array([[2, 3, 4], [7, 8, 9]], dtype=self.dtype)
x = paddle.to_tensor(np_x, dtype=self.dtype, place=device)
y = paddle.tanhshrink(x)
x_expect = np.tanhshrink(np_x)
np.testing.assert_allclose(y.numpy(), x_expect, rtol=1e-3)
paddle.enable_static()


class TestTanhshrinkComplex128(TestTanhshrinkComplex64):
def init_dtype(self):
self.dtype = np.complex128


class TestTanhshrinkAPI(unittest.TestCase):
# test paddle.nn.Tanhshrink, paddle.nn.functional.tanhshrink
def setUp(self):
Expand Down