Skip to content
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ pi_result cuda_piPlatformGetInfo(pi_platform platform,
switch (param_name) {
case PI_PLATFORM_INFO_NAME:
return getInfo(param_value_size, param_value, param_value_size_ret,
"NVIDIA CUDA");
"NVIDIA CUDA BACKEND");
case PI_PLATFORM_INFO_VENDOR:
return getInfo(param_value_size, param_value, param_value_size_ret,
"NVIDIA Corporation");
Expand Down Expand Up @@ -3359,6 +3359,13 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
ret_err = cuda_piEnqueueMemBufferRead(
command_queue, buffer, blocking_map, offset, size, hostPtr,
num_events_in_wait_list, event_wait_list, retEvent);
} else {
if (retEvent) {
auto new_event =
_pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_MAP, command_queue);
new_event->record();
*retEvent = new_event;
}
}

return ret_err;
Expand All @@ -3372,7 +3379,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
pi_event *retEvent) {
pi_result ret_err = PI_INVALID_OPERATION;
pi_result ret_err = PI_SUCCESS;

assert(mapped_ptr != nullptr);
assert(memobj != nullptr);
Expand All @@ -3385,6 +3392,13 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
command_queue, memobj, true, memobj->get_map_offset(mapped_ptr),
memobj->get_size(), mapped_ptr, num_events_in_wait_list, event_wait_list,
retEvent);
} else {
if (retEvent) {
auto new_event = _pi_event::make_native(PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
command_queue);
new_event->record();
*retEvent = new_event;
}
}

memobj->unmap(mapped_ptr);
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/platform_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class platform_impl {
bool is_host() const { return MHostPlatform; };

bool is_cuda() const {
const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA";
const string_class CUDA_PLATFORM_STRING = "NVIDIA CUDA BACKEND";
const string_class PlatformName =
get_platform_info<string_class, info::platform::name>::get(MPlatform,
getPlugin());
Expand Down
11 changes: 11 additions & 0 deletions sycl/source/detail/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,17 @@ class plugin {
RT::PiPlugin MPlugin;
const backend MBackend;
}; // class plugin

/// Two plugins are the same if their string is the same.
/// There is no need to check the actual string, just the pointer, since
/// there is only one instance of the PiPlugin struct per backend.
///
/// \ingroup sycl_pi
///
inline bool operator==(const plugin &lhs, const plugin &rhs) {
return (lhs.getPiPlugin().PluginVersion == rhs.getPiPlugin().PluginVersion);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this just check the MBackend (getBackend) of the plugins are the same.

}

} // namespace detail
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)
24 changes: 1 addition & 23 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,

RT::PiProgram Program;

bool IsCUDA = false;

// TODO: Implement `piProgramCreateWithBinary` to not require extra logic for
// the CUDA backend.
#if USE_PI_CUDA
// All devices in a context are from the same platform.
RT::PiDevice Device = getFirstDevice(Context);
RT::PiPlatform Platform = nullptr;
Plugin.call<PiApiKind::piDeviceGetInfo>(Device, PI_DEVICE_INFO_PLATFORM, sizeof(Platform),
&Platform, nullptr);
size_t PlatformNameSize = 0u;
Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME, 0u, nullptr,
&PlatformNameSize);
std::vector<char> PlatformName(PlatformNameSize, '\0');
Plugin.call<PiApiKind::piPlatformGetInfo>(Platform, PI_PLATFORM_INFO_NAME,
PlatformName.size(), PlatformName.data(), nullptr);
if (PlatformNameSize > 0u &&
std::strncmp(PlatformName.data(), "NVIDIA CUDA", PlatformNameSize) == 0) {
IsCUDA = true;
}
#endif // USE_PI_CUDA

if (IsCUDA) {
if (Context->getPlatformImpl()->is_cuda()) {
// TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend
const char *SignedData = reinterpret_cast<const char *>(Data);
Plugin.call<PiApiKind::piclProgramCreateWithSource>(Context->getHandleRef(), 1 /*one binary*/, &SignedData,
Expand Down
10 changes: 7 additions & 3 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,16 @@ class queue_impl {
: MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
MPropList(PropList), MHostQueue(MDevice->is_host()),
MOpenCLInterop(!MHostQueue) {
if (!MHostQueue) {
MCommandQueue = createQueue(Order);
}

if (!Context->hasDevice(Device))
throw cl::sycl::invalid_parameter_error(
"Queue cannot be constructed with the given context and device "
"as the context does not contain the given device.",
PI_INVALID_DEVICE);

if (!MHostQueue) {
MCommandQueue = createQueue(Order);
}
}

/// Constructs a SYCL queue from plugin interoperability handle.
Expand Down Expand Up @@ -240,6 +242,8 @@ class queue_impl {
RT::PiContext Context = MContext->getHandleRef();
RT::PiDevice Device = MDevice->getHandleRef();
const detail::plugin &Plugin = getPlugin();

assert(Plugin == MDevice->getPlugin());
RT::PiResult Error = Plugin.call_nocheck<PiApiKind::piQueueCreate>(
Context, Device, CreationFlags, &Queue);

Expand Down
3 changes: 0 additions & 3 deletions sycl/test/scheduler/DataMovement.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
// XFAIL: cuda
// TODO: Fix accidential error return when unmapping read-only memory objects.
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out -g
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
Expand Down