Skip to content

[SYCL][CUDA] Add experimental context and device interop #6202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 10, 2022
Merged
4 changes: 4 additions & 0 deletions sycl/include/CL/sycl/backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@
#include <CL/sycl/detail/backend_traits_opencl.hpp>
#endif
#if SYCL_EXT_ONEAPI_BACKEND_CUDA
#ifdef SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL
#include <sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp>
#else
#include <CL/sycl/detail/backend_traits_cuda.hpp>
#endif
#endif
#if SYCL_EXT_ONEAPI_BACKEND_HIP
#include <CL/sycl/detail/backend_traits_hip.hpp>
#endif
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/CL/sycl/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ class platform;
namespace detail {
class context_impl;
}
template <backend Backend, class SyclT>
auto get_native(const SyclT &Obj) -> backend_return_t<Backend, SyclT>;

/// The context class represents a SYCL context on which kernel functions may
/// be executed.
Expand Down Expand Up @@ -230,6 +232,10 @@ class __SYCL_EXPORT context {
pi_native_handle getNative() const;

std::shared_ptr<detail::context_impl> impl;

template <backend Backend, class SyclT>
friend auto get_native(const SyclT &Obj) -> backend_return_t<Backend, SyclT>;

template <class Obj>
friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
//===------- backend_traits_cuda.hpp - Backend traits for CUDA ---*-C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the specializations of the sycl::detail::interop,
// sycl::detail::BackendInput and sycl::detail::BackendReturn class templates
// for the CUDA backend but there is no sycl::detail::InteropFeatureSupportMap
// specialization for the CUDA backend.
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/accessor.hpp>
#include <CL/sycl/context.hpp>
#include <CL/sycl/detail/backend_traits.hpp>
#include <CL/sycl/device.hpp>
#include <CL/sycl/event.hpp>
#include <CL/sycl/kernel_bundle.hpp>
#include <CL/sycl/queue.hpp>

#include <vector>

typedef int CUdevice;
typedef struct CUctx_st *CUcontext;
typedef struct CUstream_st *CUstream;
typedef struct CUevent_st *CUevent;
typedef struct CUmod_st *CUmodule;

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {

// TODO the interops for context, device, event, platform and program
// may be removed after removing the deprecated 'get_native()' methods
// from the corresponding classes. The interop<backend, queue> specialization
// is also used in the get_queue() method of the deprecated class
// interop_handler and also can be removed after API cleanup.
template <> struct interop<backend::ext_oneapi_cuda, context> {
using type = CUcontext;
};

template <> struct interop<backend::ext_oneapi_cuda, device> {
using type = CUdevice;
};

template <> struct interop<backend::ext_oneapi_cuda, event> {
using type = CUevent;
};

template <> struct interop<backend::ext_oneapi_cuda, queue> {
using type = CUstream;
};

template <> struct interop<backend::ext_oneapi_cuda, platform> {
using type = std::vector<CUdevice>;
};

#ifdef __SYCL_INTERNAL_API
template <> struct interop<backend::ext_oneapi_cuda, program> {
using type = CUmodule;
};
#endif

template <typename DataT, int Dimensions, typename AllocatorT>
struct BackendInput<backend::ext_oneapi_cuda,
buffer<DataT, Dimensions, AllocatorT>> {
using type = DataT *;
};

template <typename DataT, int Dimensions, typename AllocatorT>
struct BackendReturn<backend::ext_oneapi_cuda,
buffer<DataT, Dimensions, AllocatorT>> {
using type = DataT *;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, context> {
using type = CUcontext;
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, context> {
using type = std::vector<CUcontext>;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, device> {
using type = CUdevice;
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, device> {
using type = CUdevice;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, event> {
using type = CUevent;
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, event> {
using type = CUevent;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, queue> {
using type = CUstream;
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, queue> {
using type = CUstream;
};

template <> struct BackendInput<backend::ext_oneapi_cuda, platform> {
using type = std::vector<CUdevice>;
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, platform> {
using type = std::vector<CUdevice>;
};

#ifdef __SYCL_INTERNAL_API
template <> struct BackendInput<backend::ext_oneapi_cuda, program> {
using type = CUmodule;
};

template <> struct BackendReturn<backend::ext_oneapi_cuda, program> {
using type = CUmodule;
};
#endif

template <> struct InteropFeatureSupportMap<backend::ext_oneapi_cuda> {
static constexpr bool MakePlatform = false;
static constexpr bool MakeDevice = true;
static constexpr bool MakeContext = true;
static constexpr bool MakeQueue = false;
static constexpr bool MakeEvent = false;
static constexpr bool MakeBuffer = false;
static constexpr bool MakeKernel = false;
static constexpr bool MakeKernelBundle = false;
};

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
75 changes: 75 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/backend/cuda.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//==--------- cuda.hpp - SYCL CUDA backend ---------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <CL/sycl/backend.hpp>
#include <CL/sycl/context.hpp>

#include <vector>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace ext {
namespace oneapi {
namespace cuda {

// Implementation of ext_oneapi_cuda::make<device>
inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) {
return sycl::detail::make_device(NativeHandle, backend::ext_oneapi_cuda);
}

} // namespace cuda
} // namespace oneapi
} // namespace ext

// CUDA context specialization
template <>
inline auto get_native<backend::ext_oneapi_cuda, context>(const context &C)
-> backend_return_t<backend::ext_oneapi_cuda, context> {
// create a vector to be returned
backend_return_t<backend::ext_oneapi_cuda, context> ret;

// get the native CUDA context from the SYCL object
auto native = reinterpret_cast<
backend_return_t<backend::ext_oneapi_cuda, context>::value_type>(
C.getNative());
ret.push_back(native);

return ret;
}

// Specialisation of non-free context get_native
template <>
inline backend_return_t<backend::ext_oneapi_cuda, context>
context::get_native<backend::ext_oneapi_cuda>() const {
return sycl::get_native<backend::ext_oneapi_cuda, context>(*this);
}

// Specialisation of interop_handles get_native_context
template <>
inline backend_return_t<backend::ext_oneapi_cuda, context>
interop_handle::get_native_context<backend::ext_oneapi_cuda>() const {
#ifndef __SYCL_DEVICE_ONLY__
return std::vector{reinterpret_cast<CUcontext>(getNativeContext())};
#else
// we believe this won't be ever called on device side
return {};
#endif
}

// CUDA device specialization
template <>
inline device make_device<backend::ext_oneapi_cuda>(
const backend_input_t<backend::ext_oneapi_cuda, device> &BackendObject) {
pi_native_handle NativeHandle = static_cast<pi_native_handle>(BackendObject);
return ext::oneapi::cuda::make_device(NativeHandle);
}

} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
96 changes: 82 additions & 14 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,15 @@ class ScopedContext {
throw PI_INVALID_CONTEXT;
}

CUcontext desired = ctxt->get();
set_context(ctxt->get());
}

ScopedContext(CUcontext ctxt) { set_context(ctxt); }

~ScopedContext() {}

private:
void set_context(CUcontext desired) {
CUcontext original = nullptr;

PI_CHECK_ERROR(cuCtxGetCurrent(&original));
Expand All @@ -195,8 +203,6 @@ class ScopedContext {
PI_CHECK_ERROR(cuCtxSetCurrent(desired));
}
}

~ScopedContext() {}
};

/// \cond NODOXY
Expand Down Expand Up @@ -1856,11 +1862,49 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device,
/// \param[out] device Set to the PI device object created from native handle.
///
/// \return TBD
pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle, pi_platform,
pi_device *) {
cl::sycl::detail::pi::die(
"Creation of PI device from native handle not implemented");
return {};
pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle,
pi_platform platform,
pi_device *piDevice) {
assert(piDevice != nullptr);

// If a platform is provided just check if the device is in it
if (platform) {
bool found_match = false;
for (auto &dev : platform->devices_) {
if (dev->get() == static_cast<CUdevice>(nativeHandle)) {
*piDevice = dev.get();
found_match = true;
}
}
if (!found_match)
return PI_INVALID_VALUE;
return PI_SUCCESS;
}

// Get list of platforms
pi_uint32 num_platforms;
pi_result result = cuda_piPlatformsGet(0, nullptr, &num_platforms);

pi_platform *plat =
static_cast<pi_platform *>(malloc(num_platforms * sizeof(pi_platform)));
result = cuda_piPlatformsGet(num_platforms, plat, nullptr);

// Iterate through platforms to find device that matches nativeHandle
bool found_match = false;
for (pi_uint32 j = 0; j < num_platforms; ++j) {
for (auto &dev : plat[j]->devices_) {
if (dev->get() == static_cast<CUdevice>(nativeHandle)) {
*piDevice = dev.get();
found_match = true;
}
}
}

// If the provided nativeHandle cannot be matched to an
// existing device return error
if (!found_match)
return PI_INVALID_VALUE;
return result;
}

/* Context APIs */
Expand Down Expand Up @@ -2025,12 +2069,36 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context,
/// \param[out] context Set to the PI context object created from native handle.
///
/// \return TBD
pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle, pi_uint32,
const pi_device *, bool,
pi_context *) {
cl::sycl::detail::pi::die(
"Creation of PI context from native handle not implemented");
return {};
pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
pi_uint32 num_devices,
const pi_device *devices,
bool ownNativeHandle,
pi_context *piContext) {
(void)num_devices;
(void)devices;
(void)ownNativeHandle;
assert(piContext != nullptr);
assert(ownNativeHandle == false);

CUcontext newContext = reinterpret_cast<CUcontext>(nativeHandle);

ScopedContext active(newContext);

// Get context's native device
CUdevice cu_device;
pi_result retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device));

// Create a SYCL device from the ctx device
pi_device device = nullptr;
retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device);

// Create sycl context
*piContext =
new _pi_context{_pi_context::kind::user_defined, newContext, device};

cuda_piContextRetain(*piContext);

return retErr;
}

/// Creates a PI Memory object using a CUDA memory allocation.
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ static const plugin &getPlugin(backend Backend) {
return pi::getPlugin<backend::opencl>();
case backend::ext_oneapi_level_zero:
return pi::getPlugin<backend::ext_oneapi_level_zero>();
case backend::ext_oneapi_cuda:
return pi::getPlugin<backend::ext_oneapi_cuda>();
default:
throw sycl::runtime_error{"Unsupported backend", PI_INVALID_OPERATION};
}
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ template __SYCL_EXPORT const plugin &
getPlugin<backend::ext_oneapi_level_zero>();
template __SYCL_EXPORT const plugin &
getPlugin<backend::ext_intel_esimd_emulator>();
template __SYCL_EXPORT const plugin &getPlugin<backend::ext_oneapi_cuda>();

// Report error and no return (keeps compiler from printing warnings).
// TODO: Probably change that to throw a catchable exception,
Expand Down
Loading