Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Update tests to use local_accessor #1063

Merged
merged 4 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/add.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void add_fetch_local_test(queue q, size_t N) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/and.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void and_local_test(queue q) {
auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
6 changes: 2 additions & 4 deletions SYCL/AtomicRef/atomic_memory_order_acq_rel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ template <memory_order order> void test_acquire_local() {
q.submit([&](handler &cgh) {
auto error =
error_buf.template get_access<access::mode::read_write>(cgh);
accessor<int, 1, access::mode::read_write, access::target::local> val(
2, cgh);
local_accessor<int, 1> val(2, cgh);
cgh.parallel_for(
nd_range<1>(global_size, local_size), [=](nd_item<1> it) {
size_t lid = it.get_local_id(0);
Expand Down Expand Up @@ -168,8 +167,7 @@ template <memory_order order> void test_release_local() {
q.submit([&](handler &cgh) {
auto error =
error_buf.template get_access<access::mode::read_write>(cgh);
accessor<int, 1, access::mode::read_write, access::target::local> val(
2, cgh);
local_accessor<int, 1> val(2, cgh);
cgh.parallel_for(
nd_range<1>(global_size, local_size), [=](nd_item<1> it) {
size_t lid = it.get_local_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/atomic_memory_order_seq_cst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,7 @@ template <memory_order order> void test_local() {

q.submit([&](handler &cgh) {
auto res = res_buf.template get_access<access::mode::discard_write>(cgh);
accessor<int, 1, access::mode::read_write, access::target::local> val(2,
cgh);
local_accessor<int, 1> val(2, cgh);
cgh.parallel_for(nd_range<1>(N_items, N_items), [=](nd_item<1> it) {
val[0] = 0;
it.barrier(access::fence_space::local_space);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/compare_exchange.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ void compare_exchange_local_test(queue q, size_t N) {
cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/exchange.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void exchange_local_test(queue q, size_t N) {
auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/load.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ void load_local_test(queue q, size_t N) {
auto ld = load_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);
cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
if (gid == 0)
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/max.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void max_local_test(queue q, size_t N) {
auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/min.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void min_local_test(queue q, size_t N) {
auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/or.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void or_local_test(queue q) {
auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/store.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ void store_local_test(queue q, size_t N) {
buffer<T> store_buf(&store, 1);
q.submit([&](handler &cgh) {
auto st = store_buf.template get_access<access::mode::read_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);
cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
size_t gid = it.get_global_id(0);
auto atm = AtomicRef<T, memory_order::relaxed, scope, space>(loc[0]);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/sub.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void sub_fetch_local_test(queue q, size_t N) {
auto sum = sum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/AtomicRef/xor.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ void xor_local_test(queue q) {
auto cum = cum_buf.template get_access<access::mode::read_write>(cgh);
auto out =
output_buf.template get_access<access::mode::discard_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> loc(1,
cgh);
local_accessor<T, 1> loc(1, cgh);

cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> it) {
int gid = it.get_global_id(0);
Expand Down
11 changes: 11 additions & 0 deletions SYCL/Basic/device_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
// TODO: nd_item::barrier() is not implemented on HOST
// RUNx: %HOST_RUN_PLACEHOLDER %t.run
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DUSE_DEPRECATED_LOCAL_ACC %s -o %t.run
// RUN: %GPU_RUN_PLACEHOLDER %t.run
// RUN: %CPU_RUN_PLACEHOLDER %t.run
// RUN: %ACC_RUN_PLACEHOLDER %t.run
// TODO: nd_item::barrier() is not implemented on HOST
// RUNx: %HOST_RUN_PLACEHOLDER %t.run
//
// Returns error "Barrier is not supported on the host device
// yet." with Nvidia.
// XFAIL: hip_nvidia
Expand Down Expand Up @@ -76,8 +83,12 @@ int test_strideN(size_t stride) {

myQueue.submit([&](handler &cgh) {
auto out_ptr = out_buf.get_access<access::mode::write>(cgh);
#ifdef USE_DEPRECATED_LOCAL_ACC
accessor<sycl::cl_int, 1, access::mode::read_write, access::target::local>
local_acc(range<1>(16), cgh);
#else
local_accessor<sycl::cl_int, 1> local_acc(range<1>(16), cgh);
#endif

// Create work-groups with 16 work items in each group.
auto myRange = nd_range<1>(range<1>(nElems), range<1>(workGroupSize));
Expand Down
3 changes: 1 addition & 2 deletions SYCL/Basic/group_async_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@ template <typename T> int test(size_t Stride) {
Q.submit([&](handler &CGH) {
auto In = InBuf.template get_access<access::mode::read>(CGH);
auto Out = OutBuf.template get_access<access::mode::write>(CGH);
accessor<T, 1, access::mode::read_write, access::target::local> Local(
range<1>{WorkGroupSize}, CGH);
local_accessor<T, 1> Local(range<1>{WorkGroupSize}, CGH);

nd_range<1> NDR{range<1>(NElems), range<1>(WorkGroupSize)};
CGH.parallel_for<KernelName<T>>(NDR, [=](nd_item<1> NDId) {
Expand Down
7 changes: 2 additions & 5 deletions SYCL/Basic/multi_ptr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,7 @@ template <typename T> void testMultPtr() {
accessor<T, 1, access::mode::read_write, access::target::device,
access::placeholder::false_t>
accessorData_2(bufferData_2, cgh);
accessor<T, 1, access::mode::read_write, access::target::local>
localAccessor(numOfItems, cgh);
local_accessor<T, 1> localAccessor(numOfItems, cgh);

cgh.parallel_for<class testMultPtrKernel<T>>(range<1>{10}, [=](id<1> wiID) {
auto ptr_1 = make_ptr<T, access::address_space::global_space>(
Expand Down Expand Up @@ -136,9 +135,7 @@ template <typename T> void testMultPtrArrowOperator() {
accessor<point<T>, 1, access::mode::read, access::target::constant_buffer,
access::placeholder::false_t>
accessorData_2(bufferData_2, cgh);
accessor<point<T>, 1, access::mode::read_write, access::target::local,
access::placeholder::false_t>
accessorData_3(1, cgh);
local_accessor<point<T>, 1> accessorData_3(1, cgh);
accessor<point<T>, 1, access::mode::read, access::target::device,
access::placeholder::false_t>
accessorData_4(bufferData_4, cgh);
Expand Down
3 changes: 1 addition & 2 deletions SYCL/DeviceLib/ITTAnnotations/barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ int main() {
// ITT start/finish annotations and ITT wg_barrier/wi_resume annotations.
q.submit([&](handler &cgh) {
auto acc = buf.get_access<access::mode::read_write>(cgh);
accessor<int, 1, access::mode::read_write, access::target::local>
local_acc(local_range, cgh);
local_accessor<int, 1> local_acc(local_range, cgh);
cgh.parallel_for<class simple_barrier_kernel>(
nd_range<1>(num_items, local_range), [=](nd_item<1> item) {
size_t idx = item.get_global_linear_id();
Expand Down
5 changes: 1 addition & 4 deletions SYCL/DeviceLib/string_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,10 +395,7 @@ bool kernel_test_memcpy_addr_space(sycl::queue &deviceQueue) {
sycl::access::placeholder::false_t>
src_acc(buffer1, cgh);

sycl::accessor<char, 1, sycl::access::mode::read_write,
sycl::access::target::local,
sycl::access::placeholder::false_t>
local_acc(sycl::range<1>(16), cgh);
sycl::local_accessor<char, 1> local_acc(sycl::range<1>(16), cgh);

sycl::accessor<char, 1, sycl::access::mode::write,
sycl::access::target::device,
Expand Down
5 changes: 1 addition & 4 deletions SYCL/DiscardEvents/discard_events_accessors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,7 @@ int main(int Argc, const char *Argv[]) {
RunKernelHelper(Q, [&](int *Harray) {
Q.submit([&](sycl::handler &CGH) {
const size_t LocalMemSize = BUFFER_SIZE;
using LocalAccessor =
sycl::accessor<int, 1, sycl::access::mode::read_write,
sycl::access::target::local>;
LocalAccessor LocalAcc(LocalMemSize, CGH);
sycl::local_accessor<int, 1> LocalAcc(LocalMemSize, CGH);

CGH.parallel_for<class kernel_using_local_memory>(
Range, [=](sycl::item<1> itemID) {
Expand Down
8 changes: 2 additions & 6 deletions SYCL/GroupAlgorithm/SYCL2020/sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,7 @@ int test_sort_over_group(sycl::queue &q, std::size_t local,
<< std::endl;
q.submit([&](sycl::handler &h) {
auto aI1 = sycl::accessor(bufI1, h);
sycl::accessor<std::byte, 1, sycl::access_mode::read_write,
sycl::access::target::local>
scratch({local_memory_size}, h);
sycl::local_accessor<std::byte, 1> scratch({local_memory_size}, h);

h.parallel_for<sort_over_group_kernel_name<int_wrapper<dim>, T, Compare>>(
sycl::nd_range<dim>(local_range, local_range),
Expand Down Expand Up @@ -167,9 +165,7 @@ int test_joint_sort(sycl::queue &q, std::size_t n_items, std::size_t local,
<< std::endl;
q.submit([&](sycl::handler &h) {
auto aI1 = sycl::accessor(bufI1, h);
sycl::accessor<std::byte, 1, sycl::access_mode::read_write,
sycl::access::target::local>
scratch({local_memory_size}, h);
sycl::local_accessor<std::byte, 1> scratch({local_memory_size}, h);

h.parallel_for<joint_sort_kernel_name<T, Compare>>(
sycl::nd_range<1>{{n_groups * local}, {local}},
Expand Down
12 changes: 4 additions & 8 deletions SYCL/GroupAlgorithm/barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,8 @@ void basic() {

q.submit([&](handler &cgh) {
auto acc = buf.get_access<access::mode::read_write>(cgh);
accessor<int, 1, access::mode::read_write, access::target::local> loc(
N, cgh);
accessor<barrier, 1, access::mode::read_write, access::target::local>
loc_barrier(2, cgh);
local_accessor<int, 1> loc(N, cgh);
local_accessor<barrier, 1> loc_barrier(2, cgh);
cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> item) {
size_t idx = item.get_local_linear_id();
loc[idx] = acc[idx];
Expand Down Expand Up @@ -69,10 +67,8 @@ void interface() {
auto data_acc = data_buf.get_access<access::mode::read_write>(cgh);
auto test1_acc = test1_buf.get_access<access::mode::read_write>(cgh);
auto test2_acc = test2_buf.get_access<access::mode::read_write>(cgh);
accessor<int, 1, access::mode::read_write, access::target::local> loc(
N, cgh);
accessor<barrier, 1, access::mode::read_write, access::target::local>
loc_barrier(2, cgh);
local_accessor<int, 1> loc(N, cgh);
local_accessor<barrier, 1> loc_barrier(2, cgh);
cgh.parallel_for(nd_range<1>(N, N), [=](nd_item<1> item) {
size_t idx = item.get_local_linear_id();
if (idx == 0) {
Expand Down
4 changes: 1 addition & 3 deletions SYCL/Regression/group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ bool group__async_work_group_copy() {

Q.submit([&](handler &cgh) {
auto AccGlobal = Buf.get_access<access::mode::read_write>(cgh);
accessor<DataType, DIMS, access::mode::read_write,
access::target::local>
AccLocal(LocalRange, cgh);
local_accessor<DataType, DIMS> AccLocal(LocalRange, cgh);

cgh.parallel_for<class group__async_work_group_copy>(
nd_range<2>{GlobalRange, LocalRange}, [=](nd_item<DIMS> I) {
Expand Down
6 changes: 2 additions & 4 deletions SYCL/Regression/local-arg-align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ int main(int argc, char *argv[]) {

q.submit([&](sycl::handler &h) {
// Use two local buffers, one with an int and one with a double4
accessor<cl_int, 1, access::mode::read_write, access::target::local> a(1,
h);
accessor<double4, 1, access::mode::read_write, access::target::local> b(1,
h);
local_accessor<cl_int, 1> a(1, h);
local_accessor<double4, 1> b(1, h);

auto ares = res.get_access<access::mode::read_write>(h);

Expand Down
4 changes: 1 addition & 3 deletions SYCL/Regression/zero_size_local_accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
int main() {
sycl::queue Q;
Q.submit([&](sycl::handler &CGH) {
sycl::accessor<uint8_t, 1, sycl::access_mode::read_write,
sycl::access::target::local>
ZeroSizeLocalAcc(sycl::range<1>(0), CGH);
sycl::local_accessor<uint8_t, 1> ZeroSizeLocalAcc(sycl::range<1>(0), CGH);
CGH.single_task([=]() {
if (ZeroSizeLocalAcc.get_range()[0])
ZeroSizeLocalAcc[0] = 1;
Expand Down
6 changes: 2 additions & 4 deletions SYCL/SubGroup/load_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ template <typename T, int N> void check(queue &Queue) {
Queue.submit([&](handler &cgh) {
auto acc = syclbuf.template get_access<access::mode::read_write>(cgh);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> LocalMem(
{L + max_sg_size * N}, cgh);
local_accessor<T, 1> LocalMem({L + max_sg_size * N}, cgh);
cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
ext::oneapi::sub_group SG = NdItem.get_sub_group();
auto SGid = SG.get_group_id().get(0);
Expand Down Expand Up @@ -132,8 +131,7 @@ template <typename T> void check(queue &Queue) {
Queue.submit([&](handler &cgh) {
auto acc = syclbuf.template get_access<access::mode::read_write>(cgh);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
accessor<T, 1, access::mode::read_write, access::target::local> LocalMem(
{L}, cgh);
local_accessor<T, 1> LocalMem({L}, cgh);
cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
ext::oneapi::sub_group SG = NdItem.get_sub_group();
if (NdItem.get_global_id(0) == 0)
Expand Down
10 changes: 10 additions & 0 deletions SYCL/SubGroup/sub_group_as.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DUSE_DEPRECATED_LOCAL_ACC %s -o %t.out
// Sub-groups are not suported on Host
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
//
// Missing __spirv_GenericCastToPtrExplicit_ToLocal,
// __spirv_SubgroupInvocationId, __spirv_GenericCastToPtrExplicit_ToGlobal,
// __spirv_SubgroupBlockReadINTEL, __assert_fail,
Expand Down Expand Up @@ -36,9 +42,13 @@ int main(int argc, char *argv[]) {
queue.submit([&](sycl::handler &cgh) {
auto global = buf.get_access<sycl::access::mode::read_write,
sycl::access::target::device>(cgh);
#ifdef USE_DEPRECATED_LOCAL_ACC
sycl::accessor<int, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local(N, cgh);
#else
sycl::local_accessor<int, 1> local(N, cgh);
#endif

cgh.parallel_for<class test>(
sycl::nd_range<1>(N, 32), [=](sycl::nd_item<1> it) {
Expand Down
11 changes: 11 additions & 0 deletions SYCL/SubGroup/sub_group_as_vec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
//
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DUSE_DEPRECATED_LOCAL_ACC %s -o %t.out
// Sub-groups are not suported on Host
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
//
//
// Missing __spirv_GenericCastToPtrExplicit_ToLocal,
// __spirv_SubgroupLocalInvocationId, __spirv_GenericCastToPtrExplicit_ToGlobal,
// __spirv_SubgroupBlockReadINTEL, __assert_fail,
Expand Down Expand Up @@ -38,9 +45,13 @@ int main(int argc, char *argv[]) {
queue.submit([&](sycl::handler &cgh) {
auto global = buf.get_access<sycl::access::mode::read_write,
sycl::access::target::device>(cgh);
#ifdef DUSE_DEPRECATED_LOCAL_ACC
sycl::accessor<sycl::vec<int, 2>, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local(N, cgh);
#else
sycl::local_accessor<sycl::vec<int, 2>, 1> local(N, cgh);
#endif
cgh.parallel_for<class test>(
sycl::nd_range<1>(N, 32), [=](sycl::nd_item<1> it) {
sycl::ext::oneapi::sub_group sg = it.get_sub_group();
Expand Down
Loading