Skip to content

[SYCL] Use PI APIs for cooperative kernels #12367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
de14fcd
[SYCL] Use PI APIs for cooperative kernels
0x12CC Jan 11, 2024
aa90450
Run clang-format
0x12CC Jan 11, 2024
1f80192
Add missing PI symbols
0x12CC Jan 11, 2024
74c795b
Address review comments
0x12CC Jan 15, 2024
10f73d9
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 15, 2024
b38f6e6
Update PI to include new query parameters
0x12CC Jan 18, 2024
203b825
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 18, 2024
26fa27c
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 19, 2024
5d68ba8
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 19, 2024
3e0b6f3
Update UR commit
0x12CC Jan 19, 2024
522c824
Update UR tag
0x12CC Jan 24, 2024
f2b4dd9
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 24, 2024
3a28dac
Update UR tag
0x12CC Jan 25, 2024
ac39404
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 25, 2024
ec0a709
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Jan 26, 2024
3c2f1cd
Update PI minor version
0x12CC Feb 13, 2024
4959a75
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Feb 13, 2024
07075d7
Update UR commit
0x12CC Feb 13, 2024
417ad60
Move cooperative check to `isFusable`
0x12CC Feb 14, 2024
fb494d5
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Feb 14, 2024
95363c5
Add branch to root group barrier
0x12CC Feb 14, 2024
1b0e4ba
Use `static_cast` in `isFusable`
0x12CC Feb 14, 2024
f2dfb69
Update warning message and add test case for non-fusable cooperative …
0x12CC Feb 14, 2024
9d59da1
Disable `use_root_sync` on HIP
0x12CC Feb 15, 2024
559f4d3
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Feb 15, 2024
b6471e9
Re-enable `use_root_sync` on HIP
0x12CC Feb 15, 2024
a80234b
Move `isFusable` test case for cooperative kernels
0x12CC Feb 15, 2024
5d21e3e
Merge remote-tracking branch 'origin/sycl' into pi_cooperative_kernels
kbenzie Feb 19, 2024
0dbc792
[UR] Bump tag to 3fd11f1d
kbenzie Feb 19, 2024
dabedb4
Merge remote-tracking branch 'origin/sycl' into pi_cooperative_kernels
kbenzie Feb 20, 2024
6136994
Remove `else` after `return`
0x12CC Feb 20, 2024
a34f065
Update sycl/include/sycl/detail/pi.h
0x12CC Feb 21, 2024
fff4a88
Update barrier comment
0x12CC Feb 21, 2024
dfe9281
Merge branch 'sycl' into pi_cooperative_kernels
0x12CC Feb 21, 2024
275d2bd
Merge branch 'sycl' into pi_cooperative_kernels
steffenlarsen Feb 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions sycl/include/sycl/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class CGExecKernel : public CG {
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
sycl::detail::pi::PiKernelCacheConfig MKernelCacheConfig;
bool MKernelIsCooperative = false;

CGExecKernel(NDRDescT NDRDesc, std::shared_ptr<HostKernelBase> HKernel,
std::shared_ptr<detail::kernel_impl> SyclKernel,
Expand All @@ -182,14 +183,15 @@ class CGExecKernel : public CG {
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
CGTYPE Type,
sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig,
detail::code_location loc = {})
bool KernelIsCooperative, detail::code_location loc = {})
: CG(Type, std::move(CGData), std::move(loc)),
MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
MSyclKernel(std::move(SyclKernel)),
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
MKernelName(std::move(KernelName)), MStreams(std::move(Streams)),
MAuxiliaryResources(std::move(AuxiliaryResources)),
MKernelCacheConfig(std::move(KernelCacheConfig)) {
MKernelCacheConfig(std::move(KernelCacheConfig)),
MKernelIsCooperative(KernelIsCooperative) {
assert(getType() == Kernel && "Wrong type of exec kernel CG.");
}

Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ _PI_API(piextKernelSetArgPointer)
_PI_API(piKernelSetExecInfo)
_PI_API(piextKernelCreateWithNativeHandle)
_PI_API(piextKernelGetNativeHandle)
_PI_API(piextKernelSuggestMaxCooperativeGroupCount)
// Event
_PI_API(piEventCreate)
_PI_API(piEventGetInfo)
Expand All @@ -105,6 +106,7 @@ _PI_API(piSamplerRetain)
_PI_API(piSamplerRelease)
// Queue commands
_PI_API(piEnqueueKernelLaunch)
_PI_API(piextEnqueueCooperativeKernelLaunch)
_PI_API(piEnqueueEventsWait)
_PI_API(piEnqueueEventsWaitWithBarrier)
_PI_API(piEnqueueMemBufferRead)
Expand Down
14 changes: 14 additions & 0 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,14 @@ __SYCL_EXPORT pi_result piextKernelCreateWithNativeHandle(
__SYCL_EXPORT pi_result
piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle);

/// Gets the max work group count for a cooperative kernel.
///
/// \param kernel is the PI kernel being queried.
/// \param group_count_ret is a pointer to where the query result will be
/// stored.
__SYCL_EXPORT pi_result piextKernelSuggestMaxCooperativeGroupCount(
pi_kernel kernel, pi_uint32 *group_count_ret);

//
// Events
//
Expand Down Expand Up @@ -1741,6 +1749,12 @@ __SYCL_EXPORT pi_result piEnqueueKernelLaunch(
const size_t *local_work_size, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

__SYCL_EXPORT pi_result piextEnqueueCooperativeKernelLaunch(
pi_queue queue, pi_kernel kernel, pi_uint32 work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

__SYCL_EXPORT pi_result piEnqueueEventsWait(pi_queue command_queue,
pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list,
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/sycl/detail/spirv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ struct sub_group;
namespace experimental {
template <typename ParentGroup> class ballot_group;
template <size_t PartitionSize, typename ParentGroup> class fixed_size_group;
template <int Dimensions> class root_group;
template <typename ParentGroup> class tangle_group;
class opportunistic_group;
} // namespace experimental
Expand Down Expand Up @@ -51,6 +52,11 @@ namespace spirv {

template <typename Group> struct group_scope {};

template <int Dimensions>
struct group_scope<sycl::ext::oneapi::experimental::root_group<Dimensions>> {
static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Device;
};

template <int Dimensions> struct group_scope<group<Dimensions>> {
static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup;
};
Expand Down
6 changes: 3 additions & 3 deletions sycl/include/sycl/detail/type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ template <class T>
inline constexpr bool is_fixed_topology_group_v =
is_fixed_topology_group<T>::value;

#ifdef SYCL_EXT_ONEAPI_ROOT_GROUP
template <> struct is_fixed_topology_group<root_group> : std::true_type {};
#endif
template <int Dimensions> class root_group;
template <int Dimensions>
struct is_fixed_topology_group<root_group<Dimensions>> : std::true_type {};

template <int Dimensions>
struct is_fixed_topology_group<sycl::group<Dimensions>> : std::true_type {};
Expand Down
33 changes: 10 additions & 23 deletions sycl/include/sycl/ext/oneapi/experimental/root_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

#include <sycl/builtins.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/group.hpp>
#include <sycl/memory_enums.hpp>
#include <sycl/queue.hpp>

#define SYCL_EXT_ONEAPI_ROOT_GROUP 1
#include <sycl/nd_item.hpp>
#include <sycl/sub_group.hpp>

namespace sycl {
inline namespace _V1 {
Expand Down Expand Up @@ -106,31 +106,18 @@ template <int Dimensions> root_group<Dimensions> get_root_group() {

} // namespace ext::oneapi::experimental

template <>
typename ext::oneapi::experimental::info::kernel_queue_specific::
max_num_work_group_sync::return_type
kernel::ext_oneapi_get_info<
ext::oneapi::experimental::info::kernel_queue_specific::
max_num_work_group_sync>(const queue &q) const {
// TODO: query the backend to return a value >= 1.
return 1;
}

template <int dimensions>
void group_barrier(ext::oneapi::experimental::root_group<dimensions> G,
memory_scope FenceScope = decltype(G)::fence_scope) {
(void)G;
(void)FenceScope;
#ifdef __SYCL_DEVICE_ONLY__
// TODO: Change __spv::Scope::Workgroup to __spv::Scope::Device once backends
// support device scope. __spv::Scope::Workgroup is only valid when
// max_num_work_group_sync is 1, so that all work items in a root group will
// also be in the same work group.
__spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup,
__spv::MemorySemanticsMask::SubgroupMemory |
__spv::MemorySemanticsMask::WorkgroupMemory |
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
// Root group barrier first synchronizes using a work group barrier. This
// allows backends to ignore the second ControlBarrier (with Device scope) if
// their maximum number of work groups is 1.
group_barrier(get_child_group(G));
detail::spirv::ControlBarrier(G, FenceScope, memory_order::seq_cst);
#else
(void)G;
(void)FenceScope;
throw sycl::runtime_error("Barriers are not supported on host device",
PI_ERROR_INVALID_DEVICE);
#endif
Expand Down
7 changes: 7 additions & 0 deletions sycl/include/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include <sycl/ext/oneapi/device_global/device_global.hpp>
#include <sycl/ext/oneapi/device_global/properties.hpp>
#include <sycl/ext/oneapi/experimental/graph.hpp>
#include <sycl/ext/oneapi/experimental/root_group.hpp>
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
#include <sycl/ext/oneapi/properties/properties.hpp>
#include <sycl/group.hpp>
Expand Down Expand Up @@ -939,6 +940,10 @@ class __SYCL_EXPORT handler {
} else {
std::ignore = Props;
}

constexpr bool UsesRootSync = PropertiesT::template has_property<
sycl::ext::oneapi::experimental::use_root_sync_key>();
setKernelIsCooperative(UsesRootSync);
}

/// Checks whether it is possible to copy the source shape to the destination
Expand Down Expand Up @@ -3618,6 +3623,8 @@ class __SYCL_EXPORT handler {

// Set value of the gpu cache configuration for the kernel.
void setKernelCacheConfig(sycl::detail::pi::PiKernelCacheConfig);
// Set value of the kernel is cooperative flag
void setKernelIsCooperative(bool);

template <
ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT>
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
#include <sycl/ext/oneapi/experimental/root_group.hpp>
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
#include <sycl/ext/oneapi/filter_selector.hpp>
#include <sycl/ext/oneapi/functional.hpp>
Expand Down
16 changes: 16 additions & 0 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextEnqueueCooperativeKernelLaunch(
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *OutEvent) {
return pi2ur::piextEnqueueCooperativeKernelLaunch(
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
pi_context Context,
pi_program Program,
Expand All @@ -553,6 +563,12 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
}

pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel Kernel,
pi_uint32 *GroupCountRet) {
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(Kernel,
GroupCountRet);
}

pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
return pi2ur::piEventCreate(Context, RetEvent);
}
Expand Down
16 changes: 16 additions & 0 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextEnqueueCooperativeKernelLaunch(
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *OutEvent) {
return pi2ur::piextEnqueueCooperativeKernelLaunch(
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
pi_context Context,
pi_program Program,
Expand All @@ -561,6 +571,12 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
}

pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel Kernel,
pi_uint32 *GroupCountRet) {
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(Kernel,
GroupCountRet);
}

pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
return pi2ur::piEventCreate(Context, RetEvent);
}
Expand Down
16 changes: 16 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextEnqueueCooperativeKernelLaunch(
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *OutEvent) {
return pi2ur::piextEnqueueCooperativeKernelLaunch(
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
pi_context Context,
pi_program Program,
Expand All @@ -572,6 +582,12 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
}

pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel Kernel,
pi_uint32 *GroupCountRet) {
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(Kernel,
GroupCountRet);
}

//
// Events
//
Expand Down
16 changes: 16 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextEnqueueCooperativeKernelLaunch(
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
const pi_event *EventWaitList, pi_event *OutEvent) {
return pi2ur::piextEnqueueCooperativeKernelLaunch(
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
NumEventsInWaitList, EventWaitList, OutEvent);
}

pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
pi_context Context,
pi_program Program,
Expand All @@ -540,6 +550,12 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
}

pi_result piextKernelSuggestMaxCooperativeGroupCount(pi_kernel Kernel,
pi_uint32 *GroupCountRet) {
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(Kernel,
GroupCountRet);
}

pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
return pi2ur::piEventCreate(Context, RetEvent);
}
Expand Down
4 changes: 2 additions & 2 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
set(UNIFIED_RUNTIME_REPO "https://github.com/0x12CC/unified-runtime.git")
# commit c53953ae492587698d5adbab8ffee254d97b6a4e
# Merge: 9f88cf88 66d52ace
# Author: Kenneth Benzie (Benie) <k.benzie@codeplay.com>
# Date: Wed Jan 10 14:50:23 2024 +0000
# Merge pull request #1170 from jchlanda/jakub/hip_custom_dirs
# [HIP] Allow custom location of ROCm components
set(UNIFIED_RUNTIME_TAG c53953ae492587698d5adbab8ffee254d97b6a4e)
set(UNIFIED_RUNTIME_TAG 48a9ef11fc4c14a1119b8410d6f4618e7c696cf3)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
37 changes: 37 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2581,6 +2581,19 @@ inline pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
return PI_SUCCESS;
}

inline pi_result
piextKernelSuggestMaxCooperativeGroupCount(pi_kernel Kernel,
pi_uint32 *GroupCountRet) {
PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL);
PI_ASSERT(GroupCountRet, PI_ERROR_INVALID_VALUE);

ur_kernel_handle_t UrKernel = reinterpret_cast<ur_kernel_handle_t>(Kernel);
HANDLE_ERRORS(
urKernelSuggestMaxCooperativeGroupCountExp(UrKernel, GroupCountRet));

return PI_SUCCESS;
}

/// API for writing data from host to a device global variable.
///
/// \param Queue is the queue
Expand Down Expand Up @@ -3610,6 +3623,30 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
return PI_SUCCESS;
}

inline pi_result piextEnqueueCooperativeKernelLaunch(
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
const pi_event *EventsWaitList, pi_event *OutEvent) {

PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL);
PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE);
PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION);

ur_queue_handle_t UrQueue = reinterpret_cast<ur_queue_handle_t>(Queue);
ur_kernel_handle_t UrKernel = reinterpret_cast<ur_kernel_handle_t>(Kernel);
const ur_event_handle_t *UrEventsWaitList =
reinterpret_cast<const ur_event_handle_t *>(EventsWaitList);

ur_event_handle_t *UREvent = reinterpret_cast<ur_event_handle_t *>(OutEvent);

HANDLE_ERRORS(urEnqueueCooperativeKernelLaunchExp(
UrQueue, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize,
LocalWorkSize, NumEventsInWaitList, UrEventsWaitList, UREvent));

return PI_SUCCESS;
}

inline pi_result
piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, pi_bool BlockingWrite,
pi_image_offset Origin, pi_image_region Region,
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
// TODO: Pass accessor mem allocations
nullptr,
// TODO: Extract from handler
PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT);
PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT, CG->MKernelIsCooperative);
if (Res != pi_result::PI_SUCCESS) {
throw sycl::exception(
sycl::make_error_code(sycl::errc::kernel),
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/handler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class handler_impl {
sycl::detail::pi::PiKernelCacheConfig MKernelCacheConfig =
PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT;

bool MKernelIsCooperative = false;

// Extra information for bindless image copy
sycl::detail::pi::PiMemImageDesc MImageDesc;
sycl::detail::pi::PiMemImageFormat MImageFormat;
Expand Down
Loading