Skip to content

Commit 70f7543

Browse files
authored
[SYCL][E2E] Rewrite Tests Containing Deprecated Overloads #3 (#16775)
The overloads for single_task and parallel_for in the sycl_ext_oneapi_kernel_properties extension are being deprecated as mentioned in #14785. So I'm rewriting tests containg such overloads so that they can still run after the deprecation. --------- Signed-off-by: Hu, Peisen <peisen.hu@intel.com>
1 parent 10f3889 commit 70f7543

15 files changed

+303
-261
lines changed

sycl/test-e2e/Basic/kernel_max_wg_size.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ __attribute__((noinline)) void f(int *result, nd_item<1> &index) {
2929
result[index.get_global_id()] = index.get_global_id();
3030
}
3131

32+
struct KernelFunctor {
33+
int *mResult;
34+
KernelFunctor(int *result) : mResult(result) {}
35+
36+
void operator()(nd_item<1> index) const { f(mResult, index); }
37+
auto get(syclex::properties_tag) const {
38+
return syclex::properties{intelex::grf_size<256>};
39+
}
40+
};
41+
3242
int main() {
3343
queue myQueue;
3444
auto myContext = myQueue.get_context();
@@ -46,11 +56,9 @@ int main() {
4656
nd_range myRange{range{maxWgSize}, range{maxWgSize}};
4757

4858
int *result = sycl::malloc_shared<int>(maxWgSize, myQueue);
49-
syclex::properties kernelProperties{intelex::grf_size<256>};
5059
myQueue.submit([&](handler &cgh) {
5160
cgh.use_kernel_bundle(myBundle);
52-
cgh.parallel_for<MyKernel>(myRange, kernelProperties,
53-
([=](nd_item<1> index) { f(result, index); }));
61+
cgh.parallel_for<MyKernel>(myRange, KernelFunctor(result));
5462
});
5563

5664
myQueue.wait();

sycl/test-e2e/Basic/sub_group_size_prop.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -44,33 +44,12 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
4444
return;
4545
}
4646

47-
auto Props = ext::oneapi::experimental::properties{
48-
ext::oneapi::experimental::sub_group_size<SGSize>};
49-
5047
nd_range<1> NdRange(SGSize * 4, SGSize * 2);
5148

5249
size_t ReadSubGroupSize = 0;
5350
{
5451
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
5552

56-
Queue.submit([&](handler &CGH) {
57-
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
58-
sycl::write_only, sycl::no_init};
59-
60-
CGH.parallel_for<SubGroupKernel<Variant::Function, SGSize>>(
61-
NdRange, Props, [=](nd_item<1> NdItem) {
62-
auto SG = NdItem.get_sub_group();
63-
if (NdItem.get_global_linear_id() == 0)
64-
ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range();
65-
});
66-
});
67-
}
68-
assert(ReadSubGroupSize == SGSize && "Failed check for function.");
69-
70-
ReadSubGroupSize = 0;
71-
{
72-
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
73-
7453
Queue.submit([&](handler &CGH) {
7554
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
7655
sycl::write_only, sycl::no_init};
@@ -81,22 +60,6 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
8160
});
8261
}
8362
assert(ReadSubGroupSize == SGSize && "Failed check for functor.");
84-
85-
ReadSubGroupSize = 0;
86-
{
87-
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
88-
89-
Queue.submit([&](handler &CGH) {
90-
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
91-
sycl::write_only, sycl::no_init};
92-
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{ReadSubGroupSizeBufAcc};
93-
94-
CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(NdRange, Props,
95-
KernelFunctor);
96-
});
97-
}
98-
assert(ReadSubGroupSize == SGSize &&
99-
"Failed check for functor and properties.");
10063
}
10164

10265
int main() {

sycl/test-e2e/ClusterLaunch/cluster_launch_parallel_for.cpp

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,49 @@
1010

1111
#include <string>
1212

13+
template <int Dim, typename T> struct KernelFunctor {
14+
int *mCorrectResultFlag;
15+
T mClusterLaunchProperty;
16+
sycl::range<Dim> mClusterRange;
17+
KernelFunctor(int *CorrectResultFlag, T ClusterLaunchProperty,
18+
sycl::range<Dim> ClusterRange)
19+
: mCorrectResultFlag(CorrectResultFlag),
20+
mClusterLaunchProperty(ClusterLaunchProperty),
21+
mClusterRange(ClusterRange) {}
22+
23+
void operator()(sycl::nd_item<Dim> It) const {
24+
uint32_t ClusterDimX, ClusterDimY, ClusterDimZ;
25+
// Temporary solution till cluster group class is implemented
26+
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) && \
27+
(__SYCL_CUDA_ARCH__ >= 900)
28+
asm volatile("\n\t"
29+
"mov.u32 %0, %%cluster_nctaid.x; \n\t"
30+
"mov.u32 %1, %%cluster_nctaid.y; \n\t"
31+
"mov.u32 %2, %%cluster_nctaid.z; \n\t"
32+
: "=r"(ClusterDimZ), "=r"(ClusterDimY), "=r"(ClusterDimX));
33+
#endif
34+
if constexpr (Dim == 1) {
35+
if (ClusterDimZ == mClusterRange[0] && ClusterDimY == 1 &&
36+
ClusterDimX == 1) {
37+
*mCorrectResultFlag = 1;
38+
}
39+
} else if constexpr (Dim == 2) {
40+
if (ClusterDimZ == mClusterRange[1] && ClusterDimY == mClusterRange[0] &&
41+
ClusterDimX == 1) {
42+
*mCorrectResultFlag = 1;
43+
}
44+
} else {
45+
if (ClusterDimZ == mClusterRange[2] && ClusterDimY == mClusterRange[1] &&
46+
ClusterDimX == mClusterRange[0]) {
47+
*mCorrectResultFlag = 1;
48+
}
49+
}
50+
}
51+
auto get(sycl::ext::oneapi::experimental::properties_tag) const {
52+
return mClusterLaunchProperty;
53+
}
54+
};
55+
1356
template <int Dim>
1457
int test_cluster_launch_parallel_for(sycl::queue &Queue,
1558
sycl::range<Dim> GlobalRange,
@@ -25,38 +68,10 @@ int test_cluster_launch_parallel_for(sycl::queue &Queue,
2568

2669
Queue
2770
.submit([&](sycl::handler &CGH) {
28-
CGH.parallel_for(sycl::nd_range<Dim>(GlobalRange, LocalRange),
29-
ClusterLaunchProperty, [=](sycl::nd_item<Dim> It) {
30-
uint32_t ClusterDimX, ClusterDimY, ClusterDimZ;
31-
// Temporary solution till cluster group class is implemented
32-
#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_CUDA_ARCH__) && \
33-
(__SYCL_CUDA_ARCH__ >= 900)
34-
asm volatile("\n\t"
35-
"mov.u32 %0, %%cluster_nctaid.x; \n\t"
36-
"mov.u32 %1, %%cluster_nctaid.y; \n\t"
37-
"mov.u32 %2, %%cluster_nctaid.z; \n\t"
38-
: "=r"(ClusterDimZ), "=r"(ClusterDimY),
39-
"=r"(ClusterDimX));
40-
#endif
41-
if constexpr (Dim == 1) {
42-
if (ClusterDimZ == ClusterRange[0] &&
43-
ClusterDimY == 1 && ClusterDimX == 1) {
44-
*CorrectResultFlag = 1;
45-
}
46-
} else if constexpr (Dim == 2) {
47-
if (ClusterDimZ == ClusterRange[1] &&
48-
ClusterDimY == ClusterRange[0] &&
49-
ClusterDimX == 1) {
50-
*CorrectResultFlag = 1;
51-
}
52-
} else {
53-
if (ClusterDimZ == ClusterRange[2] &&
54-
ClusterDimY == ClusterRange[1] &&
55-
ClusterDimX == ClusterRange[0]) {
56-
*CorrectResultFlag = 1;
57-
}
58-
}
59-
});
71+
CGH.parallel_for(
72+
sycl::nd_range<Dim>(GlobalRange, LocalRange),
73+
KernelFunctor<Dim, decltype(ClusterLaunchProperty)>(
74+
CorrectResultFlag, ClusterLaunchProperty, ClusterRange));
6075
})
6176
.wait_and_throw();
6277

sycl/test-e2e/ClusterLaunch/enqueueLaunchCustom_check_event_deps.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,22 @@ template <typename T> void dummy_kernel(T *Input, int N, sycl::nd_item<1> It) {
2424
#endif
2525
}
2626

27+
template <typename T1, typename T2> struct KernelFunctor {
28+
T1 mAcc;
29+
T2 mClusterLaunchProperty;
30+
KernelFunctor(T2 ClusterLaunchProperty, T1 Acc)
31+
: mClusterLaunchProperty(ClusterLaunchProperty), mAcc(Acc) {}
32+
33+
void operator()(sycl::nd_item<1> It) const {
34+
dummy_kernel(
35+
mAcc.template get_multi_ptr<sycl::access::decorated::yes>().get(), 4096,
36+
It);
37+
}
38+
auto get(sycl::ext::oneapi::experimental::properties_tag) const {
39+
return mClusterLaunchProperty;
40+
}
41+
};
42+
2743
int main() {
2844

2945
std::vector<int> HostArray(4096, -20);
@@ -46,13 +62,8 @@ int main() {
4662
cuda::cluster_size ClusterDims(sycl::range{2});
4763
properties ClusterLaunchProperty{ClusterDims};
4864
auto Acc = Buff.template get_access<sycl::access::mode::read_write>(CGH);
49-
CGH.parallel_for(
50-
sycl::nd_range({4096}, {32}), ClusterLaunchProperty,
51-
[=](sycl::nd_item<1> It) {
52-
dummy_kernel(
53-
Acc.get_multi_ptr<sycl::access::decorated::yes>().get(), 4096,
54-
It);
55-
});
65+
CGH.parallel_for(sycl::nd_range({4096}, {32}),
66+
KernelFunctor(ClusterLaunchProperty, Acc));
5667
});
5768
Queue.submit([&](sycl::handler &CGH) {
5869
auto Acc = Buff.template get_access<sycl::access::mode::read_write>(CGH);

sycl/test-e2e/DeviceCodeSplit/grf.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,15 @@ bool checkResult(const std::vector<float> &A, int Inc) {
6767
return true;
6868
}
6969

70+
template <typename T1, typename T2> struct KernelFunctor {
71+
T1 mPA;
72+
T2 mProp;
73+
KernelFunctor(T1 PA, T2 Prop) : mPA(PA), mProp(Prop) {}
74+
75+
void operator()(id<1> i) const { mPA[i] += 2; }
76+
auto get(properties_tag) const { return mProp; }
77+
};
78+
7079
int main(void) {
7180
constexpr unsigned Size = 32;
7281
constexpr unsigned VL = 16;
@@ -122,8 +131,8 @@ int main(void) {
122131

123132
auto e = q.submit([&](handler &cgh) {
124133
auto PA = bufa.get_access<access::mode::read_write>(cgh);
125-
cgh.parallel_for<class SYCLKernelSpecifiedGRF>(
126-
Size, prop, [=](id<1> i) { PA[i] += 2; });
134+
cgh.parallel_for<class SYCLKernelSpecifiedGRF>(Size,
135+
KernelFunctor(PA, prop));
127136
});
128137
e.wait();
129138
} catch (sycl::exception const &e) {

sycl/test-e2e/Graph/Inputs/sub_group_prop.cpp

Lines changed: 0 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -39,49 +39,13 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
3939
return;
4040
}
4141

42-
auto Props = ext::oneapi::experimental::properties{
43-
ext::oneapi::experimental::sub_group_size<SGSize>};
44-
4542
nd_range<1> NdRange(SGSize * 4, SGSize * 2);
4643

4744
size_t ReadSubGroupSize = 0;
4845
{
4946
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
5047
ReadSubGroupSizeBuf.set_write_back(false);
5148

52-
{
53-
exp_ext::command_graph Graph{
54-
Queue.get_context(),
55-
Queue.get_device(),
56-
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
57-
58-
add_node(Graph, Queue, [&](handler &CGH) {
59-
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
60-
sycl::write_only, sycl::no_init};
61-
62-
CGH.parallel_for<SubGroupKernel<Variant::Function, SGSize>>(
63-
NdRange, Props, [=](nd_item<1> NdItem) {
64-
auto SG = NdItem.get_sub_group();
65-
if (NdItem.get_global_linear_id() == 0)
66-
ReadSubGroupSizeBufAcc[0] = SG.get_local_linear_range();
67-
});
68-
});
69-
70-
auto ExecGraph = Graph.finalize();
71-
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
72-
Queue.wait_and_throw();
73-
}
74-
75-
host_accessor HostAcc(ReadSubGroupSizeBuf);
76-
ReadSubGroupSize = HostAcc[0];
77-
}
78-
assert(ReadSubGroupSize == SGSize && "Failed check for function.");
79-
80-
ReadSubGroupSize = 0;
81-
{
82-
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
83-
ReadSubGroupSizeBuf.set_write_back(false);
84-
8549
{
8650
exp_ext::command_graph Graph{
8751
Queue.get_context(),
@@ -107,38 +71,6 @@ void test(queue &Queue, const std::vector<size_t> SupportedSGSizes) {
10771
ReadSubGroupSize = HostAcc[0];
10872
}
10973
assert(ReadSubGroupSize == SGSize && "Failed check for functor.");
110-
111-
ReadSubGroupSize = 0;
112-
{
113-
buffer ReadSubGroupSizeBuf(&ReadSubGroupSize, range(1));
114-
ReadSubGroupSizeBuf.set_write_back(false);
115-
116-
{
117-
exp_ext::command_graph Graph{
118-
Queue.get_context(),
119-
Queue.get_device(),
120-
{exp_ext::property::graph::assume_buffer_outlives_graph{}}};
121-
122-
add_node(Graph, Queue, [&](handler &CGH) {
123-
accessor ReadSubGroupSizeBufAcc{ReadSubGroupSizeBuf, CGH,
124-
sycl::write_only, sycl::no_init};
125-
KernelFunctorWithSGSizeProp<SGSize> KernelFunctor{
126-
ReadSubGroupSizeBufAcc};
127-
128-
CGH.parallel_for<SubGroupKernel<Variant::Functor, SGSize>>(
129-
NdRange, Props, KernelFunctor);
130-
});
131-
132-
auto ExecGraph = Graph.finalize();
133-
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
134-
Queue.wait_and_throw();
135-
}
136-
137-
host_accessor HostAcc(ReadSubGroupSizeBuf);
138-
ReadSubGroupSize = HostAcc[0];
139-
}
140-
assert(ReadSubGroupSize == SGSize &&
141-
"Failed check for functor and properties.");
14274
}
14375

14476
int main() {

sycl/test-e2e/VirtualFunctions/misc/group-barrier.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,24 @@ class MultiplyOp : public BaseOp {
101101
}
102102
};
103103

104+
template <typename T1, typename T2, typename T3> struct KernelFunctor {
105+
T1 mDeviceStorage;
106+
T2 mDataAcc;
107+
T3 mLocalAcc;
108+
KernelFunctor(T1 DeviceStorage, T2 DataAcc, T3 LocalAcc)
109+
: mDeviceStorage(DeviceStorage), mDataAcc(DataAcc), mLocalAcc(LocalAcc) {}
110+
111+
void operator()(sycl::nd_item<1> It) const {
112+
auto *Ptr = mDeviceStorage->template getAs<BaseOp>();
113+
mDataAcc[It.get_global_id()] = Ptr->apply(
114+
mLocalAcc.template get_multi_ptr<sycl::access::decorated::no>().get(),
115+
It.get_group());
116+
}
117+
auto get(oneapi::properties_tag) const {
118+
return oneapi::properties{oneapi::assume_indirect_calls};
119+
}
120+
};
121+
104122
int main() try {
105123
using storage_t = obj_storage_t<SumOp, MultiplyOp>;
106124

@@ -113,7 +131,6 @@ int main() try {
113131
sycl::range G{16};
114132
sycl::range L{4};
115133

116-
constexpr oneapi::properties props{oneapi::assume_indirect_calls};
117134
for (unsigned TestCase = 0; TestCase < 2; ++TestCase) {
118135
sycl::buffer<int> DataStorage(G);
119136

@@ -126,12 +143,8 @@ int main() try {
126143
q.submit([&](sycl::handler &CGH) {
127144
sycl::accessor DataAcc(DataStorage, CGH, sycl::read_write);
128145
sycl::local_accessor<int> LocalAcc(L, CGH);
129-
CGH.parallel_for(sycl::nd_range{G, L}, props, [=](auto It) {
130-
auto *Ptr = DeviceStorage->getAs<BaseOp>();
131-
DataAcc[It.get_global_id()] = Ptr->apply(
132-
LocalAcc.get_multi_ptr<sycl::access::decorated::no>().get(),
133-
It.get_group());
134-
});
146+
CGH.parallel_for(sycl::nd_range{G, L},
147+
KernelFunctor(DeviceStorage, DataAcc, LocalAcc));
135148
}).wait_and_throw();
136149

137150
auto *Ptr = HostStorage.construct</* ret type = */ BaseOp>(TestCase);

0 commit comments

Comments
 (0)