intel · Ruyk · Apr 8, 2020 · Apr 11, 2020 · Apr 17, 2020 · Apr 17, 2020
@@ -684,7 +684,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform,
   switch (param_name) {
   case PI_PLATFORM_INFO_NAME:
     return getInfo(param_value_size, param_value, param_value_size_ret,
-                   "NVIDIA CUDA");
+                   "NVIDIA CUDA BACKEND");
   case PI_PLATFORM_INFO_VENDOR:
     return getInfo(param_value_size, param_value, param_value_size_ret,
                    "NVIDIA Corporation");
@@ -3359,6 +3359,13 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
     ret_err = cuda_piEnqueueMemBufferRead(
         command_queue, buffer, blocking_map, offset, size, hostPtr,
         num_events_in_wait_list, event_wait_list, retEvent);
+  } else {
+    if (retEvent) {
+      auto new_event =
+          _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue);
+      new_event->record();
+      *retEvent = new_event;
+    }
   }
 
   return ret_err;
@@ -3372,7 +3379,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
                                  pi_uint32 num_events_in_wait_list,
                                  const pi_event *event_wait_list,
                                  pi_event *retEvent) {
-  pi_result ret_err = PI_INVALID_OPERATION;
+  pi_result ret_err = PI_SUCCESS;
 
   assert(mapped_ptr != nullptr);
   assert(memobj != nullptr);
@@ -3385,6 +3392,13 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
       command_queue, memobj, true, memobj->get_map_offset(mapped_ptr),
       memobj->get_size(), mapped_ptr, num_events_in_wait_list, event_wait_list,
       retEvent);
+  } else {
+    if (retEvent) {
+      auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
+                                              command_queue);
+      new_event->record();
+      *retEvent = new_event;
+    }
   }
 
   memobj->unmap(mapped_ptr);

@@ -74,7 +74,7 @@ class platform_impl {
   bool is_host() const { return MHostPlatform; };
 
   bool is_cuda() const {
-    const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA";
+    const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA BACKEND";
     const string_class PlatformName =
         get_platform_info<string_class, info::platform::name>::get(MPlatform,
                                                                    getPlugin());

@@ -80,6 +80,17 @@ class plugin {
   RT::PiPlugin MPlugin;
   const backend MBackend;
 }; // class plugin
+
+/// Two plugins are the same if their string is the same.
+/// There is no need to check the actual string, just the pointer, since
+/// there is only one instance of the PiPlugin struct per backend.
+///
+/// \ingroup sycl_pi
+///
+inline bool operator==(const plugin &lhs, const plugin &rhs) {
+  return (lhs.getPiPlugin().PluginVersion == rhs.getPiPlugin().PluginVersion);
+}
+
 } // namespace detail
 } // namespace sycl
 } // __SYCL_INLINE_NAMESPACE(cl)
@@ -85,29 +85,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,
 
   RT::PiProgram Program;
 
-  bool IsCUDA = false;
-
-  // TODO: Implement `piProgramCreateWithBinary` to not require extra logic for
-  //       the CUDA backend.
-#if USE_PI_CUDA
-  // All devices in a context are from the same platform.
-  RT::PiDevice Device = getFirstDevice(Context);
-  RT::PiPlatform Platform = nullptr;
-  Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_PLATFORM, sizeof(Platform),
-                           &Platform, nullptr);
-  size_t PlatformNameSize = 0u;
-  Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, 0u, nullptr,
-                             &PlatformNameSize);
-  std::vector<char> PlatformName(PlatformNameSize, '\0');
-  Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME,
-                             PlatformName.size(), PlatformName.data(), nullptr);
-  if (PlatformNameSize > 0u &&
-      std::strncmp(PlatformName.data(), "NVIDIA CUDA", PlatformNameSize) == 0) {
-    IsCUDA = true;
-  }
-#endif // USE_PI_CUDA
-
-  if (IsCUDA) {
+  if (Context->getPlatformImpl()->is_cuda()) {
     // TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend
     const char *SignedData = reinterpret_cast<const char *>(Data);
     Plugin.call<PiApiKind::piclProgramCreateWithSource>(Context->getHandleRef(), 1 /*one binary*/, &SignedData,

@@ -69,14 +69,16 @@ class queue_impl {
       : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
         MPropList(PropList), MHostQueue(MDevice->is_host()),
         MOpenCLInterop(!MHostQueue) {
-    if (!MHostQueue) {
-      MCommandQueue = createQueue(Order);
-    }
+
     if (!Context->hasDevice(Device))
       throw cl::sycl::invalid_parameter_error(
           "Queue cannot be constructed with the given context and device "
           "as the context does not contain the given device.",
           PI_INVALID_DEVICE);
+
+    if (!MHostQueue) {
+      MCommandQueue = createQueue(Order);
+    }
   }
 
   /// Constructs a SYCL queue from plugin interoperability handle.
@@ -240,6 +242,8 @@ class queue_impl {
     RT::PiContext Context = MContext->getHandleRef();
     RT::PiDevice Device = MDevice->getHandleRef();
     const detail::plugin &Plugin = getPlugin();
+
+    assert(Plugin == MDevice->getPlugin());
     RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piQueueCreate>(
         Context, Device, CreationFlags, &Queue);
 

@@ -1,6 +1,3 @@
-// XFAIL: cuda
-// TODO: Fix accidential error return when unmapping read-only memory objects.
-//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out -g
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out