Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Zero-Dim] support input 0D Tensor for some XPU kernel #47849

Merged
merged 1 commit into from
Nov 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 22 additions & 24 deletions paddle/phi/kernels/xpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,39 +169,37 @@ struct XPULogGradFunctor : public funcs::BaseActivationFunctor<T> {
const DenseTensor* dOut,
DenseTensor* dX) const {
const T* x_data = nullptr;
const T* y_grad = nullptr;
const T* dout_data = nullptr;
if (x != nullptr) x_data = x->data<T>();
if (dOut != nullptr) y_grad = dOut->data<T>();
T* x_grad = dX->data<T>();
const auto x_dims = x->dims();
auto xshape = vectorize<int>(x_dims);
int len = x->dims()[x_dims.size() - 1];
std::vector<int> yshape(1, len);

xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
T* y_data = RAII_GUARD.alloc_l3_or_gm<T>(len);
PADDLE_ENFORCE_XDNN_NOT_NULL(y_data);
T* tmp_grad = RAII_GUARD.alloc_l3_or_gm<T>(x->numel());
PADDLE_ENFORCE_XDNN_NOT_NULL(tmp_grad);
int r =
xpu::constant<T>(dev_ctx.x_context(), y_data, len, static_cast<T>(1.0));
if (dOut != nullptr) dout_data = dOut->data<T>();

T* dx_data = dev_ctx.template Alloc<T>(dX);
int r = xpu::constant<T>(
dev_ctx.x_context(), dx_data, x->numel(), static_cast<T>(1.0));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");

auto x_dims = vectorize<int>(x->dims());

// use [1] to replace [], because xpu not support []
if (x_dims.size() == 0) {
x_dims = std::vector<int>({1});
}

// dx.device(d) = dout * (static_cast<T>(1) / x);
r = xpu::broadcast_div(dev_ctx.x_context(),
reinterpret_cast<const float*>(y_data),
reinterpret_cast<const float*>(dx_data),
reinterpret_cast<const float*>(x_data),
reinterpret_cast<float*>(tmp_grad),
yshape,
xshape);
reinterpret_cast<float*>(dx_data),
x_dims,
x_dims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_div");

r = xpu::broadcast_mul(dev_ctx.x_context(),
reinterpret_cast<const float*>(y_grad),
reinterpret_cast<const float*>(tmp_grad),
reinterpret_cast<float*>(x_grad),
xshape,
xshape);
reinterpret_cast<const float*>(dx_data),
reinterpret_cast<const float*>(dout_data),
reinterpret_cast<float*>(dx_data),
x_dims,
x_dims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul");
}
};
Expand Down
9 changes: 7 additions & 2 deletions paddle/phi/kernels/xpu/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,14 @@ void PowKernel(const Context& dev_ctx,
static_cast<void*>(&pow_factor),
sizeof(T));

// broadcast_pow(Context* ctx, const T* x, const T* y, T* z, const
// std::vector<int>& xshape, const std::vector<int>& yshape);
auto x_dims = vectorize<int>(x.dims());
// use [1] to replace [], because xpu not support []
if (x_dims.size() == 0) {
x_dims = std::vector<int>({1});
}

// broadcast_pow(Context* ctx, const T* x, const T* y, T* z, const
// std::vector<int>& xshape, const std::vector<int>& yshape);
int r =
xpu::broadcast_pow(xpu_context, x_data, factor_data, y_data, x_dims, {1});
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_pow");
Expand Down
20 changes: 20 additions & 0 deletions paddle/phi/kernels/xpu/elementwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,17 @@ void XPUElementwise(const XPUContext& dev_ctx,

int ret = xpu::SUCCESS;

// For [2, 3] + [] --> [2, 3] + [1, 1]
// For [] + [2, 3] --> [1, 1] + [2, 3]
// For [] + [], Use [1] + [1] to replace [], because xpu not support []
if (x_dims_vec.size() == 0) {
x_dims_vec = std::vector<int>({1});
}

if (y_dims_vec.size() == 0) {
y_dims_vec = std::vector<int>({1});
}

ret = func(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<const XPUType*>(y_data),
Expand Down Expand Up @@ -163,6 +174,15 @@ void XPUElementwiseGrad(const XPUContext& dev_ctx,
dy_data = dev_ctx.template Alloc<T>(dy);
}

// use [1] to replace [], because xpu not support []
if (x_dims_vec.size() == 0) {
x_dims_vec = std::vector<int>({1});
}

if (y_dims_vec.size() == 0) {
y_dims_vec = std::vector<int>({1});
}

int ret = func(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
reinterpret_cast<const XPUType*>(y_data),
Expand Down
8 changes: 8 additions & 0 deletions paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ void ReduceMaxGradKernel(const Context& dev_ctx,
XPU_SUCCESS,
errors::ResourceExhausted("XPU has no enough memory"));

// use [1] to replace [], because xpu not support []
if (xdims.size() == 0) {
xdims = std::vector<int>({1});
}
if (ydims.size() == 0) {
ydims = std::vector<int>({1});
}

// step 1. brocast out and out_grad
int r =
xpu::broadcast<T>(dev_ctx.x_context(), out_data, brocast1, ydims, xdims);
Expand Down
18 changes: 10 additions & 8 deletions paddle/phi/kernels/xpu/reduce_mean_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,8 @@ void ReduceMeanGradKernel(const Context& dev_ctx,

auto reduce_dims = dims_arr.GetData();

std::vector<int> xdims;
for (int i = 0; i < x.dims().size(); i++) {
xdims.push_back(x.dims()[i]);
}
std::vector<int> ydims;
for (int i = 0; i < out_grad.dims().size(); i++) {
ydims.push_back(out_grad.dims()[i]);
}
std::vector<int> xdims = vectorize<int>(x.dims());
std::vector<int> ydims = vectorize<int>(out_grad.dims());

int reduce_numel = 1;
if (reduce_all) {
Expand Down Expand Up @@ -74,6 +68,14 @@ void ReduceMeanGradKernel(const Context& dev_ctx,
dev_ctx.x_context(), x_data, x.numel(), static_cast<XPUType>(val));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");

// use [1] to replace [], because xpu not support []
if (xdims.size() == 0) {
xdims = std::vector<int>({1});
}
if (ydims.size() == 0) {
ydims = std::vector<int>({1});
}

r = xpu::broadcast_mul(
dev_ctx.x_context(), x_data, dy_data, x_data, xdims, ydims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast_mul");
Expand Down
8 changes: 8 additions & 0 deletions paddle/phi/kernels/xpu/reduce_sum_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ void ReduceSumGradKernel(const Context& dev_ctx,
}
}

// use [1] to replace [], because xpu not support []
if (xdims.size() == 0) {
xdims = std::vector<int>({1});
}
if (ydims.size() == 0) {
ydims = std::vector<int>({1});
}

int r = xpu::broadcast<XPUType>(
dev_ctx.x_context(), out_data, x_grad_data, ydims, xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "broadcast");
Expand Down
21 changes: 12 additions & 9 deletions paddle/phi/kernels/xpu/where_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,18 @@ void WhereKernel(const Context& ctx,
T* out_data = ctx.template Alloc<T>(out);

auto cond_dims = phi::vectorize<int>(condition.dims());
auto input_dims = phi::vectorize<int>(x.dims());

int ret = xpu::select(ctx.x_context(),
cond_data,
x_data,
y_data,
out_data,
cond_dims,
input_dims);
auto x_dims = phi::vectorize<int>(x.dims());

// use [1] to replace [], because xpu not support []
if (cond_dims.size() == 0) {
cond_dims = std::vector<int>({1});
}
if (x_dims.size() == 0) {
x_dims = std::vector<int>({1});
}

int ret = xpu::select(
ctx.x_context(), cond_data, x_data, y_data, out_data, cond_dims, x_dims);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "select");
}

Expand Down
28 changes: 28 additions & 0 deletions python/paddle/fluid/tests/unittests/xpu/test_activation_op_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def set_case(self):
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}

class XPUTestExp_ZeroDIm(TestActivationOPBase):
def set_shape(self):
self.shape = []


support_types = get_xpu_op_support_types('exp')
for stype in support_types:
Expand All @@ -100,6 +104,10 @@ def set_case(self):
def init_config(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)

class XPUTestSigmoid_ZeroDIm(XPUTestSigmoid):
def init_config(self):
self.x = np.random.uniform(-2, 2, []).astype(self.dtype)

class XPUTestSigmoid2(XPUTestSigmoid):
def init_config(self):
self.x = np.random.uniform(-2, 2, [100]).astype(self.dtype)
Expand Down Expand Up @@ -310,6 +318,10 @@ def set_case(self):
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}

class TestLogCase_ZeroDim(XPUTestLog):
def set_shape(self):
self.shape = []

class TestLogCase1(XPUTestLog):
def set_shape(self):
self.shape = [1, 11, 17]
Expand Down Expand Up @@ -351,6 +363,10 @@ def set_case(self):
def init_config(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)

class XPUTestSquare_ZeroDim(XPUTestSquare):
def init_config(self):
self.x = np.random.uniform(-2, 2, []).astype(self.dtype)

class XPUTestSquare2(XPUTestSquare):
def init_config(self):
self.x = np.random.uniform(-2, 2, [100]).astype(self.dtype)
Expand Down Expand Up @@ -517,6 +533,10 @@ def set_case(self):
def init_config(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)

class XPUTestSoftPlus_ZeroDim(XPUTestSoftPlusBase):
def init_config(self):
self.x = np.random.uniform(-2, 2, []).astype(self.dtype)

class XPUTestSoftPlus2(XPUTestSoftPlusBase):
def init_config(self):
self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype)
Expand Down Expand Up @@ -976,6 +996,10 @@ def set_case(self):
def init_config(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)

class XPUTestSwish_ZeroDim(XPUTestSwishBase):
def init_config(self):
self.x = np.random.uniform(-2, 2, []).astype(self.dtype)

class XPUTestSwish2(XPUTestSwishBase):
def init_config(self):
self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype)
Expand Down Expand Up @@ -1057,6 +1081,10 @@ def set_case(self):
def init_config(self):
self.x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)

class XPUTestMish_ZeroDim(XPUTestMishBase):
def init_config(self):
self.x = np.random.uniform(-2, 2, []).astype(self.dtype)

class XPUTestMish2(XPUTestMishBase):
def init_config(self):
self.x = np.random.uniform(-2, 2, [1024, 8]).astype(self.dtype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,24 @@ def init_axis(self):
def init_max_relative_error(self):
self.max_relative_error = 0.006

class TestElementwiseAddOp_ZeroDim1(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.uniform(-1, 1, []).astype(self.dtype)
self.y = np.random.uniform(-1, 1, []).astype(self.dtype)
self.out = self.x + self.y

class TestElementwiseAddOp_ZeroDim2(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.uniform(-1, 1, []).astype(self.dtype)
self.y = np.random.uniform(-1, 1, [13, 17]).astype(self.dtype)
self.out = self.x + self.y

class TestElementwiseAddOp_ZeroDim3(TestElementwiseAddOp):
def init_input_output(self):
self.x = np.random.uniform(-1, 1, [13, 17]).astype(self.dtype)
self.y = np.random.uniform(-1, 1, []).astype(self.dtype)
self.out = self.x + self.y

@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ def test_check_grad_ingore_y(self):
def init_dtype(self):
pass

class TestElementwiseDivOp_ZeroDim1(ElementwiseDivOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, []).astype(self.dtype),
'Y': np.random.uniform(-1, 1, []).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']}

class TestElementwiseDivOp_ZeroDim2(ElementwiseDivOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, [13, 17]).astype(self.dtype),
'Y': np.random.uniform(-1, 1, []).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] / self.inputs['Y']}

@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,30 @@ def init_dtype(self):
def init_axis(self):
pass

class TestElementwiseMulOp_ZeroDim1(ElementwiseMulOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, []).astype(self.dtype),
'Y': np.random.uniform(-1, 1, []).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}

class TestElementwiseMulOp_ZeroDim2(ElementwiseMulOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, [13, 17]).astype(self.dtype),
'Y': np.random.uniform(-1, 1, []).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}

class TestElementwiseMulOp_ZeroDim3(ElementwiseMulOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, []).astype(self.dtype),
'Y': np.random.uniform(-1, 1, [13, 17]).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] * self.inputs['Y']}

@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast."
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,30 @@ def test_check_grad_ingore_y(self):
no_grad_set=set('Y'),
)

class TestElementwiseSubOp_ZeroDim1(TestElementwiseOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, []).astype(self.dtype),
'Y': np.random.uniform(-1, 1, []).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}

class TestElementwiseSubOp_ZeroDim2(TestElementwiseOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, [13, 17]).astype(self.dtype),
'Y': np.random.uniform(-1, 1, []).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}

class TestElementwiseSubOp_ZeroDim3(TestElementwiseOp):
def init_input_output(self):
self.inputs = {
'X': np.random.uniform(-1, 1, []).astype(self.dtype),
'Y': np.random.uniform(-1, 1, [13, 17]).astype(self.dtype),
}
self.outputs = {'Out': self.inputs['X'] - self.inputs['Y']}

@skip_check_grad_ci(
reason="[skip shape check] Use y_shape(1) to test broadcast."
)
Expand Down
Loading