Skip to content

[SYCL][CUDA] Port CUDA plugin to Unified Runtime #9512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 45 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
da70781
[SYCL][CUDA] Export loader interface for CUDA UR adapter
Apr 4, 2023
46dca60
[SYCL][PI][UR][CUDA] Port CUDA platform, device, context to Unified R…
callumfare Apr 6, 2023
8073f6c
Port program and kernel entry points
omarahmed1111 Apr 10, 2023
506130e
Add UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE to kernel group info
omarahmed1111 Apr 13, 2023
625f1f8
[SYCL][PI][UR][CUDA] Port a few miscellaneous CUDA entry points to UR
callumfare Apr 14, 2023
3ae2329
[SYCL][PI][UR][CUDA] Port CUDA queue and event to Unified Runtime
callumfare Apr 7, 2023
103cec3
AAdd program and kernel ddi tables
omarahmed1111 Apr 18, 2023
c640339
[SYCL][PI][UR][CUDA] Port piEnqueueKernelLaunch to UR
callumfare Apr 17, 2023
8c63247
[SYCL][CUDA][UR] Add missing queue/event entry points to DDI table
callumfare Apr 19, 2023
76d4c5f
[SYCL][CUDA] Remove unused function from pi_cuda
callumfare Apr 19, 2023
3742495
[SYCL][CUDA] Add missing UR_APICALL, UR_APIEXPORT to entry points
callumfare Apr 19, 2023
7e0f0ec
Small fixes
Apr 19, 2023
17f91fc
[SYCL][PI][UR][CUDA] Port CUDA sampler to UR
callumfare Apr 18, 2023
6489ce1
[SYCL][CUDA] Fix missing input validation for various queue entry points
callumfare Apr 21, 2023
ebe90a2
Refactor memory object and entry points into new memory.hpp/cpp files…
martygrant Apr 21, 2023
ef9f224
Port USM entry points
omarahmed1111 Apr 18, 2023
d185543
[UR][CUDA][SYCL] Fix sycl-e2e tests
Apr 27, 2023
398f3e9
[UR][CUDA] Port urEnqueueRead/Write & setArgMemObj
Apr 26, 2023
e02d3d3
Port piextKernelSetArgSampler
omarahmed1111 Apr 28, 2023
816652a
[UR][SYCL][CUDA] Point PI to correct entry point
Apr 28, 2023
6d648f6
Port remaining queue entry-points
omarahmed1111 Apr 27, 2023
4f9277b
Don't check MAX_MEM_ALLOC_SIZE when creating a buffer
callumfare May 2, 2023
8968c1f
[SYCL][CUDA] Port CUDA global variable read/write to UR
callumfare Apr 28, 2023
1fb2afd
[SYCL][CUDA] Only build CUDA UR adapter when CUDA plugin is enabled
callumfare May 3, 2023
96c85ac
[SYCL][CUDA] Don't link non-CUDA adapters with cudadrv
callumfare May 3, 2023
3812978
[SYCL][CUDA] Port piextGetDeviceFunctionPointer and piextDeviceSelect…
callumfare May 4, 2023
764e683
[SYCL][CUDA] Port piPluginGetBackendOption to UR
callumfare May 5, 2023
d98adf8
[SYCL][CUDA] Port read/write host pipe to UR
callumfare May 5, 2023
797d3f7
[CUDA][UR]Fix program_info_kernel_names
omarahmed1111 May 15, 2023
6f68c7c
[SYCL][CUDA] Remove unused code from CUDA PI and move remaining docum…
callumfare May 9, 2023
ff2559f
[SYCL][CUDA] Add a few extra checks to the cuda UR program implementa…
aarongreig May 23, 2023
d69f029
[SYCL][CUDA] Implement UR_DEVICE_INFO_IL_VERSION query for cuda.
aarongreig May 23, 2023
190f3c7
[SYCL][CUDA][UR] Remove queue backward compatability apis
omarahmed1111 May 24, 2023
a441503
[SYCL][CUDA][UR] Add usmPool entry points to ddi tables and fix ur*na…
omarahmed1111 May 25, 2023
3b65369
Fix CUDA adapter formatting
callumfare May 31, 2023
0011b91
Mark KernelFusion/sync_two_queues_event_dep as unsupported on cuda pe…
callumfare May 31, 2023
9e97af7
[SYCL][CUDA] Fix assumption about work dimensions in EnqueueKernelLau…
aarongreig May 24, 2023
b538dd8
[SYCL][CUDA] Correct return type of cuda USM capability queries.
aarongreig May 29, 2023
9811f9b
[SYCL][CUDA] A number of small cuda adapter fixes for cts/spec compli…
aarongreig Jun 5, 2023
fce479c
[SYCL][UR] Avoid zero-length new in pi2ur.
aarongreig Jun 6, 2023
9b3448a
[SYCL][CUDA] Mass fixup of code style in the CUDA adapter
callumfare Jun 8, 2023
a0de2d7
[SYCL][CUDA][PI][UR] Fix PR review comments
Jun 8, 2023
2a50972
[SYCL][CUDA] Tidy CMakeLists.txt
callumfare Jun 9, 2023
c39e794
Fix various build warnings
callumfare Jun 12, 2023
b64fcbd
Address more review feedback
callumfare Jun 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[SYCL][PI][UR][CUDA] Port CUDA sampler to UR
  • Loading branch information
callumfare committed Jun 14, 2023
commit 17f91fc331e90b29065db3b2c4c7f5d170bb9ab3
2 changes: 2 additions & 0 deletions sycl/plugins/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ add_sycl_plugin(cuda
"../unified_runtime/ur/adapters/cuda/kernel.hpp"
"../unified_runtime/ur/adapters/cuda/queue.hpp"
"../unified_runtime/ur/adapters/cuda/queue.cpp"
"../unified_runtime/ur/adapters/cuda/sampler.cpp"
"../unified_runtime/ur/adapters/cuda/sampler.hpp"
"../unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp"
"../unified_runtime/ur/adapters/cuda/tracing.cpp"
# ---
Expand Down
146 changes: 4 additions & 142 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,144 +990,6 @@ pi_result cuda_piMemRetain(pi_mem mem) {
return PI_SUCCESS;
}

/// Creates a PI sampler object
///
/// \param[in] context The context the sampler is created for.
/// \param[in] sampler_properties The properties for the sampler.
/// \param[out] result_sampler Set to the resulting sampler object.
///
/// \return PI_SUCCESS on success. PI_ERROR_INVALID_VALUE if given an invalid
/// property
/// or if there is multiple of properties from the same category.
pi_result cuda_piSamplerCreate(pi_context context,
const pi_sampler_properties *sampler_properties,
pi_sampler *result_sampler) {
std::unique_ptr<_pi_sampler> retImplSampl{new _pi_sampler(context)};

bool propSeen[3] = {false, false, false};
for (size_t i = 0; sampler_properties[i] != 0; i += 2) {
switch (sampler_properties[i]) {
case PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS:
if (propSeen[0]) {
return PI_ERROR_INVALID_VALUE;
}
propSeen[0] = true;
retImplSampl->props_ |= sampler_properties[i + 1];
break;
case PI_SAMPLER_PROPERTIES_FILTER_MODE:
if (propSeen[1]) {
return PI_ERROR_INVALID_VALUE;
}
propSeen[1] = true;
retImplSampl->props_ |=
(sampler_properties[i + 1] - PI_SAMPLER_FILTER_MODE_NEAREST) << 1;
break;
case PI_SAMPLER_PROPERTIES_ADDRESSING_MODE:
if (propSeen[2]) {
return PI_ERROR_INVALID_VALUE;
}
propSeen[2] = true;
retImplSampl->props_ |=
(sampler_properties[i + 1] - PI_SAMPLER_ADDRESSING_MODE_NONE) << 2;
break;
default:
return PI_ERROR_INVALID_VALUE;
}
}

if (!propSeen[0]) {
retImplSampl->props_ |= PI_TRUE;
}
// Default filter mode to PI_SAMPLER_FILTER_MODE_NEAREST
if (!propSeen[2]) {
retImplSampl->props_ |=
(PI_SAMPLER_ADDRESSING_MODE_CLAMP % PI_SAMPLER_ADDRESSING_MODE_NONE)
<< 2;
}

*result_sampler = retImplSampl.release();
return PI_SUCCESS;
}

/// Gets information from a PI sampler object
///
/// \param[in] sampler The sampler to get the information from.
/// \param[in] param_name The name of the information to get.
/// \param[in] param_value_size The size of the param_value.
/// \param[out] param_value Set to information value.
/// \param[out] param_value_size_ret Set to the size of the information value.
///
/// \return PI_SUCCESS on success.
pi_result cuda_piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) {
assert(sampler != nullptr);

switch (param_name) {
case PI_SAMPLER_INFO_REFERENCE_COUNT:
return getInfo(param_value_size, param_value, param_value_size_ret,
sampler->get_reference_count());
case PI_SAMPLER_INFO_CONTEXT:
return getInfo(param_value_size, param_value, param_value_size_ret,
sampler->context_);
case PI_SAMPLER_INFO_NORMALIZED_COORDS: {
pi_bool norm_coords_prop = static_cast<pi_bool>(sampler->props_ & 0x1);
return getInfo(param_value_size, param_value, param_value_size_ret,
norm_coords_prop);
}
case PI_SAMPLER_INFO_FILTER_MODE: {
pi_sampler_filter_mode filter_prop = static_cast<pi_sampler_filter_mode>(
((sampler->props_ >> 1) & 0x1) + PI_SAMPLER_FILTER_MODE_NEAREST);
return getInfo(param_value_size, param_value, param_value_size_ret,
filter_prop);
}
case PI_SAMPLER_INFO_ADDRESSING_MODE: {
pi_sampler_addressing_mode addressing_prop =
static_cast<pi_sampler_addressing_mode>(
(sampler->props_ >> 2) + PI_SAMPLER_ADDRESSING_MODE_NONE);
return getInfo(param_value_size, param_value, param_value_size_ret,
addressing_prop);
}
default:
__SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name);
}
return {};
}

/// Retains a PI sampler object, incrementing its reference count.
///
/// \param[in] sampler The sampler to increment the reference count of.
///
/// \return PI_SUCCESS.
pi_result cuda_piSamplerRetain(pi_sampler sampler) {
assert(sampler != nullptr);
sampler->increment_reference_count();
return PI_SUCCESS;
}

/// Releases a PI sampler object, decrementing its reference count. If the
/// reference count reaches zero, the sampler object is destroyed.
///
/// \param[in] sampler The sampler to decrement the reference count of.
///
/// \return PI_SUCCESS.
pi_result cuda_piSamplerRelease(pi_sampler sampler) {
assert(sampler != nullptr);

// double delete or someone is messing with the ref count.
// either way, cannot safely proceed.
sycl::detail::pi::assertion(
sampler->get_reference_count() != 0,
"Reference count overflow detected in cuda_piSamplerRelease.");

// decrement ref count. If it is 0, delete the sampler.
if (sampler->decrement_reference_count() == 0) {
delete sampler;
}

return PI_SUCCESS;
}

/// General 3D memory copy operation.
/// This function requires the corresponding CUDA context to be at the top of
/// the context stack
Expand Down Expand Up @@ -2667,10 +2529,10 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
_PI_CL(piextEventCreateWithNativeHandle,
pi2ur::piextEventCreateWithNativeHandle)
// Sampler
_PI_CL(piSamplerCreate, cuda_piSamplerCreate)
_PI_CL(piSamplerGetInfo, cuda_piSamplerGetInfo)
_PI_CL(piSamplerRetain, cuda_piSamplerRetain)
_PI_CL(piSamplerRelease, cuda_piSamplerRelease)
_PI_CL(piSamplerCreate, pi2ur::piSamplerCreate)
_PI_CL(piSamplerGetInfo, pi2ur::piSamplerGetInfo)
_PI_CL(piSamplerRetain, pi2ur::piSamplerRetain)
_PI_CL(piSamplerRelease, pi2ur::piSamplerRelease)
// Queue commands
_PI_CL(piEnqueueKernelLaunch, pi2ur::piEnqueueKernelLaunch)
_PI_CL(piEnqueueNativeKernel, cuda_piEnqueueNativeKernel)
Expand Down
16 changes: 3 additions & 13 deletions sycl/plugins/cuda/pi_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include <ur/adapters/cuda/program.hpp>
#include <ur/adapters/cuda/event.hpp>
#include <ur/adapters/cuda/queue.hpp>
#include <ur/adapters/cuda/sampler.hpp>

// Share code between the PI Plugin and UR Adapter
#include <pi2ur.hpp>
Expand Down Expand Up @@ -406,19 +407,8 @@ struct _pi_kernel : ur_kernel_handle_t_ {
/// Sampler property layout:
/// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 |
/// | N/A | addressing mode | fiter mode | normalize coords |
struct _pi_sampler {
std::atomic_uint32_t refCount_;
pi_uint32 props_;
pi_context context_;

_pi_sampler(pi_context context)
: refCount_(1), props_(0), context_(context) {}

pi_uint32 increment_reference_count() noexcept { return ++refCount_; }

pi_uint32 decrement_reference_count() noexcept { return --refCount_; }

pi_uint32 get_reference_count() const noexcept { return refCount_; }
struct _pi_sampler : ur_sampler_handle_t_ {
using ur_sampler_handle_t_::ur_sampler_handle_t_;
};

// -------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ add_sycl_library("ur_adapter_cuda" SHARED
"ur/adapters/cuda/kernel.hpp"
"ur/adapters/cuda/queue.cpp"
"ur/adapters/cuda/queue.hpp"
"ur/adapters/cuda/sampler.cpp"
"ur/adapters/cuda/sampler.hpp"
"ur/adapters/cuda/ur_interface_loader.cpp"
"ur/adapters/cuda/tracing.cpp"
INCLUDE_DIRS
Expand Down
84 changes: 84 additions & 0 deletions sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
//===--------- sampler.cpp - CUDA Adapter ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//

#include "sampler.hpp"
#include "common.hpp"

ur_result_t urSamplerCreate(ur_context_handle_t hContext,
const ur_sampler_desc_t *pDesc,
ur_sampler_handle_t *phSampler) {
std::unique_ptr<ur_sampler_handle_t_> retImplSampl{
new ur_sampler_handle_t_(hContext)};

if (pDesc && pDesc->stype == UR_STRUCTURE_TYPE_SAMPLER_DESC) {
retImplSampl->props_ |= pDesc->normalizedCoords;
retImplSampl->props_ |= (pDesc->filterMode << 1);
retImplSampl->props_ |= (pDesc->addressingMode << 2);
} else {
// Set default values
retImplSampl->props_ |= true; // Normalized Coords
retImplSampl->props_ |= UR_SAMPLER_ADDRESSING_MODE_CLAMP << 2;
}

*phSampler = retImplSampl.release();
return UR_RESULT_SUCCESS;
}

ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler,
ur_sampler_info_t propName, size_t propValueSize,
void *pPropValue, size_t *pPropSizeRet) {
UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
UrReturnHelper ReturnValue(propValueSize, pPropValue, pPropSizeRet);

switch (propName) {
case UR_SAMPLER_INFO_REFERENCE_COUNT:
return ReturnValue(hSampler->get_reference_count());
case UR_SAMPLER_INFO_CONTEXT:
return ReturnValue(hSampler->context_);
case UR_SAMPLER_INFO_NORMALIZED_COORDS: {
bool norm_coords_prop = static_cast<bool>(hSampler->props_);
return ReturnValue(norm_coords_prop);
}
case UR_SAMPLER_INFO_FILTER_MODE: {
auto filter_prop =
static_cast<ur_sampler_filter_mode_t>(((hSampler->props_ >> 1) & 0x1));
return ReturnValue(filter_prop);
}
case UR_SAMPLER_INFO_ADDRESSING_MODE: {
auto addressing_prop =
static_cast<ur_sampler_addressing_mode_t>(hSampler->props_ >> 2);
return ReturnValue(addressing_prop);
}
default:
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
}
return {};
}

ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) {
UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE);
hSampler->increment_reference_count();
return UR_RESULT_SUCCESS;
}

ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) {
UR_ASSERT(hSampler, UR_RESULT_ERROR_INVALID_NULL_HANDLE);

// double delete or someone is messing with the ref count.
// either way, cannot safely proceed.
sycl::detail::ur::assertion(
hSampler->get_reference_count() != 0,
"Reference count overflow detected in urSamplerRelease.");

// decrement ref count. If it is 0, delete the sampler.
if (hSampler->decrement_reference_count() == 0) {
delete hSampler;
}

return UR_RESULT_SUCCESS;
}
29 changes: 29 additions & 0 deletions sycl/plugins/unified_runtime/ur/adapters/cuda/sampler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===--------- sampler.hpp - CUDA Adapter ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------===//

#include <ur/ur.hpp>

/// Implementation of samplers for CUDA
///
/// Sampler property layout:
/// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 |
/// | N/A | addressing mode | fiter mode | normalize coords |
struct ur_sampler_handle_t_ {
std::atomic_uint32_t refCount_;
uint32_t props_;
ur_context_handle_t context_;

ur_sampler_handle_t_(ur_context_handle_t context)
: refCount_(1), props_(0), context_(context) {}

uint32_t increment_reference_count() noexcept { return ++refCount_; }

uint32_t decrement_reference_count() noexcept { return --refCount_; }

uint32_t get_reference_count() const noexcept { return refCount_; }
};
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable(
if (UR_RESULT_SUCCESS != result) {
return result;
}
pDdiTable->pfnCreate = nullptr;
pDdiTable->pfnCreate = urSamplerCreate;
pDdiTable->pfnCreateWithNativeHandle = nullptr;
pDdiTable->pfnGetInfo = nullptr;
pDdiTable->pfnGetInfo = urSamplerGetInfo;
pDdiTable->pfnGetNativeHandle = nullptr;
pDdiTable->pfnRelease = nullptr;
pDdiTable->pfnRetain = nullptr;
pDdiTable->pfnRelease = urSamplerRelease;
pDdiTable->pfnRetain = urSamplerRetain;
return UR_RESULT_SUCCESS;
}

Expand Down