intel · pvchupin · Jun 10, 2022 · Feb 10, 2022 · Feb 17, 2022 · May 26, 2022
@@ -18,8 +18,12 @@
 #include <CL/sycl/detail/backend_traits_opencl.hpp>
 #endif
 #if SYCL_EXT_ONEAPI_BACKEND_CUDA
+#ifdef SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL
+#include <sycl/ext/oneapi/experimental/backend/backend_traits_cuda.hpp>
+#else
 #include <CL/sycl/detail/backend_traits_cuda.hpp>
 #endif
+#endif
 #if SYCL_EXT_ONEAPI_BACKEND_HIP
 #include <CL/sycl/detail/backend_traits_hip.hpp>
 #endif

@@ -27,6 +27,8 @@ class platform;
 namespace detail {
 class context_impl;
 }
+template <backend Backend, class SyclT>
+auto get_native(const SyclT &Obj) -> backend_return_t<Backend, SyclT>;
 
 /// The context class represents a SYCL context on which kernel functions may
 /// be executed.
@@ -230,6 +232,10 @@ class __SYCL_EXPORT context {
   pi_native_handle getNative() const;
 
   std::shared_ptr<detail::context_impl> impl;
+
+  template <backend Backend, class SyclT>
+  friend auto get_native(const SyclT &Obj) -> backend_return_t<Backend, SyclT>;
+
   template <class Obj>
   friend decltype(Obj::impl) detail::getSyclObjImpl(const Obj &SyclObject);
 

@@ -0,0 +1,143 @@
+//===------- backend_traits_cuda.hpp - Backend traits for CUDA ---*-C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the specializations of the sycl::detail::interop,
+// sycl::detail::BackendInput and sycl::detail::BackendReturn class templates
+// for the CUDA backend but there is no sycl::detail::InteropFeatureSupportMap
+// specialization for the CUDA backend.
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/accessor.hpp>
+#include <CL/sycl/context.hpp>
+#include <CL/sycl/detail/backend_traits.hpp>
+#include <CL/sycl/device.hpp>
+#include <CL/sycl/event.hpp>
+#include <CL/sycl/kernel_bundle.hpp>
+#include <CL/sycl/queue.hpp>
+
+#include <vector>
+
+typedef int CUdevice;
+typedef struct CUctx_st *CUcontext;
+typedef struct CUstream_st *CUstream;
+typedef struct CUevent_st *CUevent;
+typedef struct CUmod_st *CUmodule;
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace detail {
+
+// TODO the interops for context, device, event, platform and program
+// may be removed after removing the deprecated 'get_native()' methods
+// from the corresponding classes. The interop<backend, queue> specialization
+// is also used in the get_queue() method of the deprecated class
+// interop_handler and also can be removed after API cleanup.
+template <> struct interop<backend::ext_oneapi_cuda, context> {
+  using type = CUcontext;
+};
+
+template <> struct interop<backend::ext_oneapi_cuda, device> {
+  using type = CUdevice;
+};
+
+template <> struct interop<backend::ext_oneapi_cuda, event> {
+  using type = CUevent;
+};
+
+template <> struct interop<backend::ext_oneapi_cuda, queue> {
+  using type = CUstream;
+};
+
+template <> struct interop<backend::ext_oneapi_cuda, platform> {
+  using type = std::vector<CUdevice>;
+};
+
+#ifdef __SYCL_INTERNAL_API
+template <> struct interop<backend::ext_oneapi_cuda, program> {
+  using type = CUmodule;
+};
+#endif
+
+template <typename DataT, int Dimensions, typename AllocatorT>
+struct BackendInput<backend::ext_oneapi_cuda,
+                    buffer<DataT, Dimensions, AllocatorT>> {
+  using type = DataT *;
+};
+
+template <typename DataT, int Dimensions, typename AllocatorT>
+struct BackendReturn<backend::ext_oneapi_cuda,
+                     buffer<DataT, Dimensions, AllocatorT>> {
+  using type = DataT *;
+};
+
+template <> struct BackendInput<backend::ext_oneapi_cuda, context> {
+  using type = CUcontext;
+};
+
+template <> struct BackendReturn<backend::ext_oneapi_cuda, context> {
+  using type = std::vector<CUcontext>;
+};
+
+template <> struct BackendInput<backend::ext_oneapi_cuda, device> {
+  using type = CUdevice;
+};
+
+template <> struct BackendReturn<backend::ext_oneapi_cuda, device> {
+  using type = CUdevice;
+};
+
+template <> struct BackendInput<backend::ext_oneapi_cuda, event> {
+  using type = CUevent;
+};
+
+template <> struct BackendReturn<backend::ext_oneapi_cuda, event> {
+  using type = CUevent;
+};
+
+template <> struct BackendInput<backend::ext_oneapi_cuda, queue> {
+  using type = CUstream;
+};
+
+template <> struct BackendReturn<backend::ext_oneapi_cuda, queue> {
+  using type = CUstream;
+};
+
+template <> struct BackendInput<backend::ext_oneapi_cuda, platform> {
+  using type = std::vector<CUdevice>;
+};
+
+template <> struct BackendReturn<backend::ext_oneapi_cuda, platform> {
+  using type = std::vector<CUdevice>;
+};
+
+#ifdef __SYCL_INTERNAL_API
+template <> struct BackendInput<backend::ext_oneapi_cuda, program> {
+  using type = CUmodule;
+};
+
+template <> struct BackendReturn<backend::ext_oneapi_cuda, program> {
+  using type = CUmodule;
+};
+#endif
+
+template <> struct InteropFeatureSupportMap<backend::ext_oneapi_cuda> {
+  static constexpr bool MakePlatform = false;
+  static constexpr bool MakeDevice = true;
+  static constexpr bool MakeContext = true;
+  static constexpr bool MakeQueue = false;
+  static constexpr bool MakeEvent = false;
+  static constexpr bool MakeBuffer = false;
+  static constexpr bool MakeKernel = false;
+  static constexpr bool MakeKernelBundle = false;
+};
+
+} // namespace detail
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
@@ -0,0 +1,75 @@
+//==--------- cuda.hpp - SYCL CUDA backend ---------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <CL/sycl/backend.hpp>
+#include <CL/sycl/context.hpp>
+
+#include <vector>
+
+__SYCL_INLINE_NAMESPACE(cl) {
+namespace sycl {
+namespace ext {
+namespace oneapi {
+namespace cuda {
+
+// Implementation of ext_oneapi_cuda::make<device>
+inline __SYCL_EXPORT device make_device(pi_native_handle NativeHandle) {
+  return sycl::detail::make_device(NativeHandle, backend::ext_oneapi_cuda);
+}
+
+} // namespace cuda
+} // namespace oneapi
+} // namespace ext
+
+// CUDA context specialization
+template <>
+inline auto get_native<backend::ext_oneapi_cuda, context>(const context &C)
+    -> backend_return_t<backend::ext_oneapi_cuda, context> {
+  // create a vector to be returned
+  backend_return_t<backend::ext_oneapi_cuda, context> ret;
+
+  // get the native CUDA context from the SYCL object
+  auto native = reinterpret_cast<
+      backend_return_t<backend::ext_oneapi_cuda, context>::value_type>(
+      C.getNative());
+  ret.push_back(native);
+
+  return ret;
+}
+
+// Specialisation of non-free context get_native
+template <>
+inline backend_return_t<backend::ext_oneapi_cuda, context>
+context::get_native<backend::ext_oneapi_cuda>() const {
+  return sycl::get_native<backend::ext_oneapi_cuda, context>(*this);
+}
+
+// Specialisation of interop_handles get_native_context
+template <>
+inline backend_return_t<backend::ext_oneapi_cuda, context>
+interop_handle::get_native_context<backend::ext_oneapi_cuda>() const {
+#ifndef __SYCL_DEVICE_ONLY__
+  return std::vector{reinterpret_cast<CUcontext>(getNativeContext())};
+#else
+  // we believe this won't be ever called on device side
+  return {};
+#endif
+}
+
+// CUDA device specialization
+template <>
+inline device make_device<backend::ext_oneapi_cuda>(
+    const backend_input_t<backend::ext_oneapi_cuda, device> &BackendObject) {
+  pi_native_handle NativeHandle = static_cast<pi_native_handle>(BackendObject);
+  return ext::oneapi::cuda::make_device(NativeHandle);
+}
+
+} // namespace sycl
+} // __SYCL_INLINE_NAMESPACE(cl)
@@ -184,7 +184,15 @@ class ScopedContext {
       throw PI_INVALID_CONTEXT;
     }
 
-    CUcontext desired = ctxt->get();
+    set_context(ctxt->get());
+  }
+
+  ScopedContext(CUcontext ctxt) { set_context(ctxt); }
+
+  ~ScopedContext() {}
+
+private:
+  void set_context(CUcontext desired) {
     CUcontext original = nullptr;
 
     PI_CHECK_ERROR(cuCtxGetCurrent(&original));
@@ -195,8 +203,6 @@ class ScopedContext {
       PI_CHECK_ERROR(cuCtxSetCurrent(desired));
     }
   }
-
-  ~ScopedContext() {}
 };
 
 /// \cond NODOXY
@@ -1856,11 +1862,49 @@ pi_result cuda_piextDeviceGetNativeHandle(pi_device device,
 /// \param[out] device Set to the PI device object created from native handle.
 ///
 /// \return TBD
-pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle, pi_platform,
-                                                 pi_device *) {
-  cl::sycl::detail::pi::die(
-      "Creation of PI device from native handle not implemented");
-  return {};
+pi_result cuda_piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle,
+                                                 pi_platform platform,
+                                                 pi_device *piDevice) {
+  assert(piDevice != nullptr);
+
+  // If a platform is provided just check if the device is in it
+  if (platform) {
+    bool found_match = false;
+    for (auto &dev : platform->devices_) {
+      if (dev->get() == static_cast<CUdevice>(nativeHandle)) {
+        *piDevice = dev.get();
+        found_match = true;
+      }
+    }
+    if (!found_match)
+      return PI_INVALID_VALUE;
+    return PI_SUCCESS;
+  }
+
+  // Get list of platforms
+  pi_uint32 num_platforms;
+  pi_result result = cuda_piPlatformsGet(0, nullptr, &num_platforms);
+
+  pi_platform *plat =
+      static_cast<pi_platform *>(malloc(num_platforms * sizeof(pi_platform)));
+  result = cuda_piPlatformsGet(num_platforms, plat, nullptr);
+
+  // Iterate through platforms to find device that matches nativeHandle
+  bool found_match = false;
+  for (pi_uint32 j = 0; j < num_platforms; ++j) {
+    for (auto &dev : plat[j]->devices_) {
+      if (dev->get() == static_cast<CUdevice>(nativeHandle)) {
+        *piDevice = dev.get();
+        found_match = true;
+      }
+    }
+  }
+
+  // If the provided nativeHandle cannot be matched to an
+  // existing device return error
+  if (!found_match)
+    return PI_INVALID_VALUE;
+  return result;
 }
 
 /* Context APIs */
@@ -2025,12 +2069,36 @@ pi_result cuda_piextContextGetNativeHandle(pi_context context,
 /// \param[out] context Set to the PI context object created from native handle.
 ///
 /// \return TBD
-pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle, pi_uint32,
-                                                  const pi_device *, bool,
-                                                  pi_context *) {
-  cl::sycl::detail::pi::die(
-      "Creation of PI context from native handle not implemented");
-  return {};
+pi_result cuda_piextContextCreateWithNativeHandle(pi_native_handle nativeHandle,
+                                                  pi_uint32 num_devices,
+                                                  const pi_device *devices,
+                                                  bool ownNativeHandle,
+                                                  pi_context *piContext) {
+  (void)num_devices;
+  (void)devices;
+  (void)ownNativeHandle;
+  assert(piContext != nullptr);
+  assert(ownNativeHandle == false);
+
+  CUcontext newContext = reinterpret_cast<CUcontext>(nativeHandle);
+
+  ScopedContext active(newContext);
+
+  // Get context's native device
+  CUdevice cu_device;
+  pi_result retErr = PI_CHECK_ERROR(cuCtxGetDevice(&cu_device));
+
+  // Create a SYCL device from the ctx device
+  pi_device device = nullptr;
+  retErr = cuda_piextDeviceCreateWithNativeHandle(cu_device, nullptr, &device);
+
+  // Create sycl context
+  *piContext =
+      new _pi_context{_pi_context::kind::user_defined, newContext, device};
+
+  cuda_piContextRetain(*piContext);
+
+  return retErr;
 }
 
 /// Creates a PI Memory object using a CUDA memory allocation.

@@ -35,6 +35,8 @@ static const plugin &getPlugin(backend Backend) {
     return pi::getPlugin<backend::opencl>();
   case backend::ext_oneapi_level_zero:
     return pi::getPlugin<backend::ext_oneapi_level_zero>();
+  case backend::ext_oneapi_cuda:
+    return pi::getPlugin<backend::ext_oneapi_cuda>();
   default:
     throw sycl::runtime_error{"Unsupported backend", PI_INVALID_OPERATION};
   }

@@ -529,6 +529,7 @@ template __SYCL_EXPORT const plugin &
 getPlugin<backend::ext_oneapi_level_zero>();
 template __SYCL_EXPORT const plugin &
 getPlugin<backend::ext_intel_esimd_emulator>();
+template __SYCL_EXPORT const plugin &getPlugin<backend::ext_oneapi_cuda>();
 
 // Report error and no return (keeps compiler from printing warnings).
 // TODO: Probably change that to throw a catchable exception,