Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OPENCL] Add UR handles to OPENCL adapter #1176

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 57 additions & 39 deletions source/adapters/opencl/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@

#include "command_buffer.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "kernel.hpp"
#include "memory.hpp"
#include "queue.hpp"

namespace {
ur_result_t
Expand Down Expand Up @@ -41,7 +46,7 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) {
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
urQueueRelease(hInternalQueue);

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->get();
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
Expand All @@ -61,7 +66,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_queue_handle_t Queue = nullptr;
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->get();
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
Expand All @@ -72,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(hDevice);
cl_device_id CLDevice = hDevice->get();
CL_RETURN_ON_FAILURE(
getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities));
bool DeviceSupportsUpdate = UpdateCapabilities > 0;
Expand All @@ -86,16 +91,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0};

cl_int Res = CL_SUCCESS;
auto CLCommandBuffer = clCreateCommandBufferKHR(
1, cl_adapter::cast<cl_command_queue *>(&Queue), Properties, &Res);
const cl_command_queue CLQueue = Queue->get();
auto CLCommandBuffer =
clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res);
CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer);

try {
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
Queue, hContext, CLCommandBuffer, IsUpdatable);
*phCommandBuffer = URCommandBuffer.release();
} catch (...) {
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

CL_RETURN_ON_FAILURE(Res);
Expand Down Expand Up @@ -124,7 +132,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {

UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
Expand All @@ -148,7 +156,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
ur_exp_command_buffer_sync_point_t *pSyncPoint,
ur_exp_command_buffer_command_handle_t *phCommandHandle) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
Expand All @@ -170,10 +178,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
cl_command_properties_khr *Properties =
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr;
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, OutCommandHandle));
hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->get(),
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint,
OutCommandHandle));

try {
auto URCommandHandle =
Expand Down Expand Up @@ -221,18 +229,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));

CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->get(),
hDstMem->get(), srcOffset, dstOffset, size, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand All @@ -255,19 +262,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z};
size_t OpenCLRegion[3]{region.width, region.height, region.depth};

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));

CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->get(),
hDstMem->get(), OpenCLOriginRect, OpenCLDstRect, OpenCLRegion,
srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -339,17 +345,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList,
ur_exp_command_buffer_sync_point_t *pSyncPoint) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));

CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->get(),
pPattern, patternSize, offset, size, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -393,21 +399,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache,
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR));

const uint32_t NumberOfQueues = 1;

cl_event Event;
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->get();
}
cl_command_queue CLQueue = hQueue->get();
CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR(
NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
hCommandBuffer->CLCommandBuffer, numEventsInWaitList,
cl_adapter::cast<const cl_event *>(phEventWaitList),
cl_adapter::cast<cl_event *>(phEvent)));

NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer,
numEventsInWaitList, CLWaitEvents.data(), &Event));
if (phEvent) {
try {
auto UREvent =
std::make_unique<ur_event_handle_t_>(Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -460,11 +479,11 @@ void updateKernelArgs(std::vector<cl_mutable_dispatch_arg_khr> &CLArgs,
for (uint32_t i = 0; i < NumMemobjArgs; i++) {
const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg =
ArgMemobjList[i];
cl_mem arg_value = URMemObjArg.hNewMemObjArg->get();
cl_mutable_dispatch_arg_khr CLArg{
URMemObjArg.argIndex, // arg_index
sizeof(cl_mem), // arg_size
cl_adapter::cast<const cl_mem *>(
&URMemObjArg.hNewMemObjArg) // arg_value
&arg_value // arg_value
};

CLArgs.push_back(CLArg);
Expand Down Expand Up @@ -495,7 +514,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
}

ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer;
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->get();

cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
UR_RETURN_ON_FAILURE(
Expand Down Expand Up @@ -545,8 +564,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
updateNDRange(CLLocalWorkSize, LocalWorkSizePtr);
}

cl_mutable_command_khr command =
cl_adapter::cast<cl_mutable_command_khr>(hCommand->CLMutableCommand);
cl_mutable_command_khr command = hCommand->CLMutableCommand;
cl_mutable_dispatch_config_khr dispatch_config = {
command,
static_cast<cl_uint>(CLArgs.size()), // num_args
Expand Down
14 changes: 0 additions & 14 deletions source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,6 @@ extern thread_local char ErrorMessage[MaxMessageSize];
ur_result_t ErrorCode);

[[noreturn]] void die(const char *Message);

template <class To, class From> To cast(From Value) {

if constexpr (std::is_pointer_v<From>) {
static_assert(std::is_pointer_v<From> == std::is_pointer_v<To>,
"Cast failed pointer check");
return reinterpret_cast<To>(Value);
} else {
static_assert(sizeof(From) == sizeof(To), "Cast failed size check");
static_assert(std::is_signed_v<From> == std::is_signed_v<To>,
"Cast failed sign check");
return static_cast<To>(Value);
}
}
} // namespace cl_adapter

namespace cl_ext {
Expand Down
Loading
Loading