Skip to content

[SYCL] Add element size argument to piKernelSetArg #5104

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions sycl/include/CL/sycl/detail/cg_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ namespace detail {
class ArgDesc {
public:
ArgDesc(cl::sycl::detail::kernel_param_kind_t Type, void *Ptr, int Size,
int Index)
: MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index) {}
int Index, int ElemSize = 0)
: MType(Type), MPtr(Ptr), MSize(Size), MIndex(Index),
MElemSize(ElemSize) {}

cl::sycl::detail::kernel_param_kind_t MType;
void *MPtr;
int MSize;
int MIndex;
int MElemSize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The patch should break ABI since this structure crosses library boundaries. Breaking ABI is not allowed right now.

};

// The structure represents NDRange - global, local sizes, global offset and
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1262,7 +1262,8 @@ __SYCL_EXPORT pi_result piKernelCreate(pi_program program,
pi_kernel *ret_kernel);

__SYCL_EXPORT pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
size_t arg_size, const void *arg_value);
size_t arg_size, const void *arg_value,
size_t arg_align = 0);

__SYCL_EXPORT pi_result piKernelGetInfo(pi_kernel kernel,
pi_kernel_info param_name,
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2453,15 +2453,16 @@ pi_result cuda_piKernelCreate(pi_program program, const char *kernel_name,
}

pi_result cuda_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
size_t arg_size, const void *arg_value) {
size_t arg_size, const void *arg_value,
size_t arg_align) {

assert(kernel != nullptr);
pi_result retErr = PI_SUCCESS;
try {
if (arg_value) {
kernel->set_kernel_arg(arg_index, arg_size, arg_value);
} else {
kernel->set_kernel_local_arg(arg_index, arg_size);
kernel->set_kernel_local_arg(arg_index, arg_size, arg_align);
}
} catch (pi_result err) {
retErr = err;
Expand Down
12 changes: 8 additions & 4 deletions sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,9 +636,13 @@ struct _pi_kernel {
offsetPerIndex_[index] = localSize;
}

void add_local_arg(size_t index, size_t size) {
void add_local_arg(size_t index, size_t size, size_t align) {
size_t localOffset = this->get_local_size();
add_arg(index, sizeof(size_t), (const void *)&(localOffset), size);
if (localOffset % align != 0) {
localOffset = localOffset + align - (localOffset % align);
}
add_arg(index, sizeof(size_t), (const void *)&(localOffset),
size + (localOffset - this->get_local_size()));
}

void set_implicit_offset(size_t size, std::uint32_t *implicitOffset) {
Expand Down Expand Up @@ -719,8 +723,8 @@ struct _pi_kernel {
args_.add_arg(index, size, arg);
}

void set_kernel_local_arg(int index, size_t size) {
args_.add_local_arg(index, size);
void set_kernel_local_arg(int index, size_t size, size_t arg_align) {
args_.add_local_arg(index, size, arg_align);
}

void set_implicit_offset_arg(size_t size, std::uint32_t *implicitOffset) {
Expand Down
4 changes: 3 additions & 1 deletion sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2408,7 +2408,9 @@ pi_result hip_piKernelCreate(pi_program program, const char *kernel_name,
}

pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
size_t arg_size, const void *arg_value) {
size_t arg_size, const void *arg_value,
size_t arg_align) {
(void)arg_align;

assert(kernel != nullptr);
pi_result retErr = PI_SUCCESS;
Expand Down
4 changes: 2 additions & 2 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4250,8 +4250,8 @@ pi_result piKernelCreate(pi_program Program, const char *KernelName,
}

pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,
const void *ArgValue) {

const void *ArgValue, size_t ArgAlign) {
(void)ArgAlign;
// OpenCL: "the arg_value pointer can be NULL or point to a NULL value
// in which case a NULL value will be used as the value for the argument
// declared as a pointer to global or constant memory in the kernel"
Expand Down
10 changes: 9 additions & 1 deletion sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,14 @@ pi_result piextKernelSetArgSampler(pi_kernel kernel, pi_uint32 arg_index,
sizeof(cl_sampler), cast<const cl_sampler *>(arg_value)));
}

pi_result piextKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
size_t arg_size, const void *arg_value,
size_t arg_align) {
(void)arg_align;
return cast<pi_result>(clSetKernelArg(
cast<cl_kernel>(kernel), cast<cl_uint>(arg_index), arg_size, arg_value));
}

pi_result piextKernelCreateWithNativeHandle(pi_native_handle nativeHandle,
pi_context, pi_program, bool,
pi_kernel *piKernel) {
Expand Down Expand Up @@ -1409,7 +1417,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextProgramCreateWithNativeHandle, piextProgramCreateWithNativeHandle)
// Kernel
_PI_CL(piKernelCreate, piKernelCreate)
_PI_CL(piKernelSetArg, clSetKernelArg)
_PI_CL(piKernelSetArg, piextKernelSetArg)
_PI_CL(piKernelGetInfo, clGetKernelInfo)
_PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo)
_PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo)
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1762,8 +1762,8 @@ static pi_result SetKernelParamsAndLaunch(
"which accessors are used");
RT::PiMem MemArg = (RT::PiMem)getMemAllocationFunc(Req);
if (Plugin.getBackend() == backend::opencl) {
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, NextTrueIndex,
sizeof(RT::PiMem), &MemArg);
Plugin.call<PiApiKind::piKernelSetArg>(
Kernel, NextTrueIndex, sizeof(RT::PiMem), &MemArg, Arg.MElemSize);
} else {
Plugin.call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
&MemArg);
Expand All @@ -1772,7 +1772,7 @@ static pi_result SetKernelParamsAndLaunch(
}
case kernel_param_kind_t::kind_std_layout: {
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, NextTrueIndex, Arg.MSize,
Arg.MPtr);
Arg.MPtr, Arg.MElemSize);
break;
}
case kernel_param_kind_t::kind_sampler: {
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
for (int I = 0; I < Dims; ++I)
SizeInBytes *= Size[I];
MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr,
SizeInBytes, Index + IndexShift);
SizeInBytes, Index + IndexShift, LAcc->MElemSize);
if (!IsKernelCreatedFromSource) {
++IndexShift;
const size_t SizeAccField = Dims * sizeof(Size[0]);
Expand Down
3 changes: 2 additions & 1 deletion sycl/unittests/assert/assert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ redefinedMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size,
static pi_result redefinedMemRelease(pi_mem mem) { return PI_SUCCESS; }

static pi_result redefinedKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
size_t arg_size, const void *arg_value) {
size_t arg_size, const void *arg_value,
size_t arg_align) {
return PI_SUCCESS;
}

Expand Down
14 changes: 7 additions & 7 deletions sycl/unittests/pi/cuda/test_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSimple) {

int number = 10;
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 0, sizeof(int), &number)),
kern, 0, sizeof(int), &number, 0)),
PI_SUCCESS);
const auto &kernArgs = kern->get_arg_indices();
ASSERT_EQ(kernArgs.size(), (size_t)1 + NUM_IMPLICIT_ARGS);
Expand Down Expand Up @@ -266,7 +266,7 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) {

int number = 10;
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 0, sizeof(int), &number)),
kern, 0, sizeof(int), &number, 0)),
PI_SUCCESS);
const auto &kernArgs = kern->get_arg_indices();
ASSERT_GT(kernArgs.size(), (size_t)0 + NUM_IMPLICIT_ARGS);
Expand All @@ -275,7 +275,7 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) {

int otherNumber = 934;
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 0, sizeof(int), &otherNumber)),
kern, 0, sizeof(int), &otherNumber, 0)),
PI_SUCCESS);
const auto &kernArgs2 = kern->get_arg_indices();
ASSERT_EQ(kernArgs2.size(), (size_t)1 + NUM_IMPLICIT_ARGS);
Expand Down Expand Up @@ -311,7 +311,7 @@ TEST_F(CudaKernelsTest, PIKernelSetMemObj) {
PI_SUCCESS);

ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 0, sizeof(pi_mem), &memObj)),
kern, 0, sizeof(pi_mem), &memObj, 0)),
PI_SUCCESS);
const auto &kernArgs = kern->get_arg_indices();
ASSERT_EQ(kernArgs.size(), (size_t)1 + NUM_IMPLICIT_ARGS);
Expand Down Expand Up @@ -441,23 +441,23 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwiceOneLocal) {

int number = 10;
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 0, sizeof(int), &number)),
kern, 0, sizeof(int), &number, 0)),
PI_SUCCESS);
const auto &kernArgs = kern->get_arg_indices();
ASSERT_GT(kernArgs.size(), (size_t)0 + NUM_IMPLICIT_ARGS);
int storedValue = *(static_cast<const int *>(kernArgs[0]));
ASSERT_EQ(storedValue, number);

ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 1, sizeof(int), nullptr)),
kern, 1, sizeof(int), nullptr, 0)),
PI_SUCCESS);
const auto &kernArgs2 = kern->get_arg_indices();
ASSERT_EQ(kernArgs2.size(), (size_t)2 + NUM_IMPLICIT_ARGS);
storedValue = *(static_cast<const int *>(kernArgs2[1]));
ASSERT_EQ(storedValue, 0);

ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piKernelSetArg>(
kern, 2, sizeof(int), nullptr)),
kern, 2, sizeof(int), nullptr, 0)),
PI_SUCCESS);
const auto &kernArgs3 = kern->get_arg_indices();
ASSERT_EQ(kernArgs3.size(), (size_t)3 + NUM_IMPLICIT_ARGS);
Expand Down