Skip to content

Commit f741bdd

Browse files
[SYCL] Write tests following the test plan for the work group memory extension (#15928)
This PR adds feature tests as per the work group memory extension [test plan](https://github.com/intel/llvm/blob/sycl/sycl/test-e2e/WorkGroupMemory/test-plan.md). --------- Co-authored-by: Steffen Larsen <steffen.larsen@intel.com>
1 parent 33fe64c commit f741bdd

File tree

10 files changed

+808
-18
lines changed

10 files changed

+808
-18
lines changed

sycl/include/sycl/ext/oneapi/experimental/work_group_memory.hpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
4646
using value_type = std::remove_all_extents_t<DataT>;
4747

4848
private:
49+
// At the moment we do not have a way to set properties nor property values to
50+
// set for work group memory. So, we check here for diagnostic purposes that
51+
// the property list is empty.
52+
// TODO: Remove this function and its occurrences in this file once properties
53+
// have been created for work group memory.
54+
void check_props_empty() const {
55+
static_assert(std::is_same_v<PropertyListT, empty_properties_t> &&
56+
"Work group memory class does not support properties yet!");
57+
}
4958
using decoratedPtr = typename sycl::detail::DecoratedType<
5059
value_type, access::address_space::local_space>::type *;
5160

@@ -62,18 +71,22 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(work_group_memory) work_group_memory
6271
#endif
6372

6473
public:
65-
work_group_memory(const indeterminate_t &) {};
74+
work_group_memory(const indeterminate_t &) { check_props_empty(); };
6675
work_group_memory(const work_group_memory &rhs) = default;
6776
work_group_memory &operator=(const work_group_memory &rhs) = default;
6877
template <typename T = DataT,
6978
typename = std::enable_if_t<!sycl::detail::is_unbounded_array_v<T>>>
7079
work_group_memory(handler &)
71-
: sycl::detail::work_group_memory_impl(sizeof(DataT)) {}
80+
: sycl::detail::work_group_memory_impl(sizeof(DataT)) {
81+
check_props_empty();
82+
}
7283
template <typename T = DataT,
7384
typename = std::enable_if_t<sycl::detail::is_unbounded_array_v<T>>>
7485
work_group_memory(size_t num, handler &)
7586
: sycl::detail::work_group_memory_impl(
76-
num * sizeof(std::remove_extent_t<DataT>)) {}
87+
num * sizeof(std::remove_extent_t<DataT>)) {
88+
check_props_empty();
89+
}
7790
template <access::decorated IsDecorated = access::decorated::no>
7891
multi_ptr<value_type, access::address_space::local_space, IsDecorated>
7992
get_multi_ptr() const {

sycl/test-e2e/WorkGroupMemory/swap_test.cpp renamed to sycl/test-e2e/WorkGroupMemory/basic_usage.cpp

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#include <sycl/detail/core.hpp>
66
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
77
#include <sycl/group_barrier.hpp>
8+
#include <sycl/half_type.hpp>
9+
810
namespace syclexp = sycl::ext::oneapi::experimental;
911

1012
sycl::queue q;
@@ -50,7 +52,9 @@ template <typename T> void swap_scalar(T &a, T &b) {
5052
sycl::nd_range<1> ndr{size, wgsize};
5153
cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) {
5254
syclexp::work_group_memory<T> temp2{syclexp::indeterminate};
53-
temp2 = temp; // temp and temp2 have the same underlying data
55+
temp2 = temp; // temp and temp2 have the same underlying data
56+
assert(&temp2 == &temp); // check that both objects return same
57+
// underlying address after assignment
5458
temp = acc_a[0];
5559
acc_a[0] = acc_b[0];
5660
acc_b[0] = temp2; // safe to use temp2
@@ -86,6 +90,8 @@ template <typename T> void swap_scalar(T &a, T &b) {
8690
assert(a == old_b && b == old_a && "Incorrect swap!");
8791

8892
// Same as above but instead of using multi_ptr, use address-of operator.
93+
// Also verify that get_multi_ptr() returns the same address as address-of
94+
// operator.
8995
{
9096
sycl::buffer<T, 1> buf_a{&a, 1};
9197
sycl::buffer<T, 1> buf_b{&b, 1};
@@ -96,6 +102,7 @@ template <typename T> void swap_scalar(T &a, T &b) {
96102
syclexp::work_group_memory<T> temp2{cgh};
97103
sycl::nd_range<1> ndr{size, wgsize};
98104
cgh.parallel_for(ndr, [=](sycl::nd_item<> it) {
105+
assert(&temp == temp.get_multi_ptr().get());
99106
temp = acc_a[0];
100107
acc_a[0] = acc_b[0];
101108
temp2 = *(&temp);
@@ -294,6 +301,8 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) {
294301
temp[i][j] = acc_a[i][j];
295302
acc_a[i][j] = acc_b[i][j];
296303
syclexp::work_group_memory<T[N][N]> temp2{temp};
304+
assert(&temp2 == &temp); // check both objects return same underlying
305+
// address after copy construction.
297306
acc_b[i][j] = temp2[i][j];
298307
});
299308
});
@@ -342,28 +351,28 @@ void swap_array_2d(T (&a)[N][N], T (&b)[N][N], size_t batch_size) {
342351
// so we can verify that each work-item sees the value written by its leader.
343352
// The test also is a sanity check that different work groups get different
344353
// work group memory locations as otherwise we'd have data races.
345-
void coherency(size_t size, size_t wgsize) {
354+
template <typename T> void coherency(size_t size, size_t wgsize) {
346355
q.submit([&](sycl::handler &cgh) {
347-
syclexp::work_group_memory<int> data{cgh};
356+
syclexp::work_group_memory<T> data{cgh};
348357
sycl::nd_range<1> ndr{size, wgsize};
349358
cgh.parallel_for(ndr, [=](sycl::nd_item<1> it) {
350359
if (it.get_group().leader()) {
351-
data = it.get_global_id() / wgsize;
360+
data = T(it.get_global_id() / wgsize);
352361
}
353362
sycl::group_barrier(it.get_group());
354-
assert(data == it.get_global_id() / wgsize);
363+
assert(data == T(it.get_global_id() / wgsize));
355364
});
356365
});
357366
}
358367

359368
constexpr size_t N = 32;
360-
int main() {
361-
int intarr1[N][N];
362-
int intarr2[N][N];
369+
template <typename T> void test() {
370+
T intarr1[N][N];
371+
T intarr2[N][N];
363372
for (int i = 0; i < N; ++i) {
364373
for (int j = 0; j < N; ++j) {
365-
intarr1[i][j] = i + j;
366-
intarr2[i][j] = i * j;
374+
intarr1[i][j] = T(i) + T(j);
375+
intarr2[i][j] = T(i) * T(j);
367376
}
368377
}
369378
for (int i = 0; i < N; ++i) {
@@ -373,10 +382,37 @@ int main() {
373382
swap_array_1d(intarr1[i], intarr2[i], 8);
374383
}
375384
swap_array_2d(intarr1, intarr2, 8);
376-
coherency(N, N / 2);
377-
coherency(N, N / 4);
378-
coherency(N, N / 8);
379-
coherency(N, N / 16);
380-
coherency(N, N / 32);
385+
coherency<T>(N, N / 2);
386+
coherency<T>(N, N / 4);
387+
coherency<T>(N, N / 8);
388+
coherency<T>(N, N / 16);
389+
coherency<T>(N, N / 32);
390+
}
391+
392+
template <typename T> void test_ptr() {
393+
T arr1[N][N];
394+
T arr2[N][N];
395+
for (int i = 0; i < N; ++i) {
396+
for (int j = 0; j < N; ++j) {
397+
swap_scalar(arr1[i][j], arr2[i][j]);
398+
}
399+
swap_array_1d(arr1[i], arr2[i], 8);
400+
}
401+
swap_array_2d(arr1, arr2, 8);
402+
}
403+
404+
int main() {
405+
test<int>();
406+
test<char>();
407+
test<uint16_t>();
408+
if (q.get_device().has(sycl::aspect::fp16))
409+
test<sycl::half>();
410+
test_ptr<float *>();
411+
test_ptr<int *>();
412+
test_ptr<char *>();
413+
test_ptr<uint16_t *>();
414+
if (q.get_device().has(sycl::aspect::fp16))
415+
test_ptr<sycl::half *>();
416+
test_ptr<float *>();
381417
return 0;
382418
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#pragma once
2+
3+
#include <cassert>
4+
#include <iostream>
5+
#include <sycl/atomic_ref.hpp>
6+
#include <sycl/detail/core.hpp>
7+
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
8+
#include <sycl/ext/oneapi/free_function_queries.hpp>
9+
#include <sycl/group_barrier.hpp>
10+
#include <sycl/marray.hpp>
11+
#include <sycl/usm.hpp>
12+
#include <sycl/vector.hpp>
13+
14+
using namespace sycl;
15+
16+
template <typename T> bool check_half_aspect(queue &q) {
17+
if (std::is_same_v<sycl::half, T> &&
18+
!q.get_device().has(sycl::aspect::fp16)) {
19+
std::cout << "Device does not support fp16 aspect. Skipping all tests with "
20+
"sycl::half type!"
21+
<< std::endl;
22+
return false;
23+
}
24+
return true;
25+
}
26+
27+
template <typename T> bool check_double_aspect(queue &q) {
28+
if (std::is_same_v<T, double> && !q.get_device().has(aspect::fp64)) {
29+
std::cout << "Device does not support fp64 aspect. Skipping all tests with "
30+
"double type!"
31+
<< std::endl;
32+
return false;
33+
}
34+
return true;
35+
}
36+
37+
template <typename T> struct S {
38+
T val;
39+
};
40+
41+
template <typename T> struct M {
42+
T val;
43+
};
44+
45+
union U {
46+
S<int> s;
47+
M<int> m;
48+
};
49+
50+
template <typename T>
51+
void sum_helper(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem,
52+
sycl::ext::oneapi::experimental::work_group_memory<T> ret,
53+
size_t WGSIZE) {
54+
for (int i = 0; i < WGSIZE; ++i) {
55+
ret = ret + mem[i];
56+
}
57+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#pragma once
2+
3+
#include "common.hpp"
4+
#include "common_lambda.hpp"
5+
#include <cassert>
6+
#include <sycl/detail/core.hpp>
7+
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
8+
#include <sycl/ext/oneapi/free_function_queries.hpp>
9+
#include <sycl/group_barrier.hpp>
10+
#include <sycl/usm.hpp>
11+
12+
using namespace sycl;
13+
14+
template <typename T>
15+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
16+
(ext::oneapi::experimental::nd_range_kernel<1>))
17+
void sum(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem, T *buf,
18+
sycl::ext::oneapi::experimental::work_group_memory<T> result,
19+
T expected, size_t WGSIZE, bool UseHelper) {
20+
const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
21+
size_t local_id = it.get_local_id();
22+
mem[local_id] = buf[local_id];
23+
group_barrier(it.get_group());
24+
if (it.get_group().leader()) {
25+
result = 0;
26+
if (!UseHelper) {
27+
for (int i = 0; i < WGSIZE; ++i) {
28+
result = result + mem[i];
29+
}
30+
} else {
31+
sum_helper(mem, result, WGSIZE);
32+
}
33+
assert(result == expected);
34+
}
35+
}
36+
37+
// Explicit instantiations for the relevant data types.
38+
// These are needed because free function kernel support is not fully
39+
// implemented yet.
40+
// TODO: Remove these once free function kernel support is fully there.
41+
#define SUM(T) \
42+
template void sum<T>( \
43+
sycl::ext::oneapi::experimental::work_group_memory<T[]> mem, T * buf, \
44+
sycl::ext::oneapi::experimental::work_group_memory<T> result, \
45+
T expected, size_t WGSIZE, bool UseHelper);
46+
47+
SUM(int)
48+
SUM(uint16_t)
49+
SUM(half)
50+
SUM(double)
51+
SUM(float)
52+
SUM(char)
53+
SUM(bool)
54+
55+
template <typename T>
56+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
57+
(ext::oneapi::experimental::nd_range_kernel<1>))
58+
void sum_marray(
59+
sycl::ext::oneapi::experimental::work_group_memory<sycl::marray<T, 16>> mem,
60+
T *buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,
61+
T expected) {
62+
const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
63+
size_t local_id = it.get_local_id();
64+
constexpr T tolerance = 0.0001;
65+
sycl::marray<T, 16> &data = mem;
66+
data[local_id] = buf[local_id];
67+
group_barrier(it.get_group());
68+
if (it.get_group().leader()) {
69+
result = 0;
70+
for (int i = 0; i < 16; ++i) {
71+
result = result + data[i];
72+
}
73+
assert((result - expected) * (result - expected) <= tolerance);
74+
}
75+
}
76+
77+
// Explicit instantiations for the relevant data types.
78+
#define SUM_MARRAY(T) \
79+
template void sum_marray<T>( \
80+
sycl::ext::oneapi::experimental::work_group_memory<sycl::marray<T, 16>> \
81+
mem, \
82+
T * buf, sycl::ext::oneapi::experimental::work_group_memory<T> result, \
83+
T expected);
84+
85+
SUM_MARRAY(float);
86+
SUM_MARRAY(double);
87+
SUM_MARRAY(half);
88+
89+
template <typename T>
90+
SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
91+
(ext::oneapi::experimental::nd_range_kernel<1>))
92+
void sum_vec(
93+
sycl::ext::oneapi::experimental::work_group_memory<sycl::vec<T, 16>> mem,
94+
T *buf, sycl::ext::oneapi::experimental::work_group_memory<T> result,
95+
T expected) {
96+
const auto it = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
97+
size_t local_id = it.get_local_id();
98+
constexpr T tolerance = 0.0001;
99+
sycl::vec<T, 16> &data = mem;
100+
data[local_id] = buf[local_id];
101+
group_barrier(it.get_group());
102+
if (it.get_group().leader()) {
103+
result = 0;
104+
for (int i = 0; i < 16; ++i) {
105+
result = result + data[i];
106+
}
107+
assert((result - expected) * (result - expected) <= tolerance);
108+
}
109+
}
110+
111+
// Explicit instantiations for the relevant data types.
112+
#define SUM_VEC(T) \
113+
template void sum_vec<T>( \
114+
sycl::ext::oneapi::experimental::work_group_memory<sycl::vec<T, 16>> \
115+
mem, \
116+
T * buf, sycl::ext::oneapi::experimental::work_group_memory<T> result, \
117+
T expected);
118+
119+
SUM_VEC(float);
120+
SUM_VEC(double);
121+
SUM_VEC(half);
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#pragma once
2+
3+
#include <cassert>
4+
#include <sycl/atomic_ref.hpp>
5+
#include <sycl/detail/core.hpp>
6+
#include <sycl/ext/oneapi/experimental/work_group_memory.hpp>
7+
#include <sycl/ext/oneapi/free_function_queries.hpp>
8+
#include <sycl/group_barrier.hpp>
9+
#include <sycl/marray.hpp>
10+
#include <sycl/usm.hpp>
11+
#include <sycl/vector.hpp>
12+
13+
using namespace sycl;
14+
15+
template <typename T>
16+
void sum_helper(sycl::ext::oneapi::experimental::work_group_memory<T[]> mem,
17+
sycl::ext::oneapi::experimental::work_group_memory<T> ret,
18+
size_t WGSIZE) {
19+
for (int i = 0; i < WGSIZE; ++i) {
20+
ret = ret + mem[i];
21+
}
22+
}

0 commit comments

Comments
 (0)