Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 0dc4242

Browse files
committed
[SYCL] Add basic tests for non-uniform groups
Tests the ability to create an instance of each new group type, and the correctness of the core member functions. Signed-off-by: John Pennycook <john.pennycook@intel.com>
1 parent 0e64964 commit 0dc4242

File tree

6 files changed

+286
-0
lines changed

6 files changed

+286
-0
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
//
5+
// UNSUPPORTED: cuda || hip
6+
7+
#include <sycl/sycl.hpp>
8+
#include <vector>
9+
namespace syclex = sycl::ext::oneapi::experimental;
10+
11+
class TestKernel;
12+
13+
int main() {
14+
sycl::queue Q;
15+
16+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
17+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
18+
std::cout << "Test skipped due to missing support for sub-group size 32."
19+
<< std::endl;
20+
return 0;
21+
}
22+
23+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
24+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
25+
26+
const auto NDR = sycl::nd_range<1>{32, 32};
27+
Q.submit([&](sycl::handler &CGH) {
28+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
29+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
30+
const auto KernelFunc =
31+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
32+
auto WI = item.get_global_id();
33+
auto SG = item.get_sub_group();
34+
35+
// Split into odd and even work-items
36+
bool Predicate = item.get_global_id() % 2 == 0;
37+
auto BallotGroup = syclex::get_ballot_group(SG, Predicate);
38+
39+
// Check function return values match Predicate
40+
bool Match = true;
41+
auto GroupID = (Predicate) ? 1 : 0;
42+
Match &= (BallotGroup.get_group_id() == GroupID);
43+
Match &= (BallotGroup.get_local_id() == SG.get_local_id() / 2);
44+
Match &= (BallotGroup.get_group_range() == 2);
45+
Match &= (BallotGroup.get_local_range() == 16);
46+
MatchAcc[WI] = Match;
47+
LeaderAcc[WI] = BallotGroup.leader();
48+
};
49+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
50+
});
51+
52+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
53+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
54+
for (int WI = 0; WI < 32; ++WI) {
55+
assert(MatchAcc[WI] == true);
56+
assert(LeaderAcc[WI] == (WI < 2));
57+
}
58+
return 0;
59+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
//
5+
// UNSUPPORTED: cuda || hip
6+
7+
#include <sycl/sycl.hpp>
8+
#include <vector>
9+
namespace syclex = sycl::ext::oneapi::experimental;
10+
11+
template <size_t ClusterSize> class TestKernel;
12+
13+
template <size_t ClusterSize> void test() {
14+
sycl::queue Q;
15+
16+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
17+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
18+
std::cout << "Test skipped due to missing support for sub-group size 32."
19+
<< std::endl;
20+
}
21+
22+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
23+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
24+
25+
const auto NDR = sycl::nd_range<1>{32, 32};
26+
Q.submit([&](sycl::handler &CGH) {
27+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
28+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
29+
const auto KernelFunc =
30+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
31+
auto WI = item.get_global_id();
32+
auto SG = item.get_sub_group();
33+
34+
auto ClusterGroup = syclex::get_cluster_group<ClusterSize>(SG);
35+
36+
bool Match = true;
37+
Match &= (ClusterGroup.get_group_id() == (WI / ClusterSize));
38+
Match &= (ClusterGroup.get_local_id() == (WI % ClusterSize));
39+
Match &= (ClusterGroup.get_group_range() == (32 / ClusterSize));
40+
Match &= (ClusterGroup.get_local_range() == ClusterSize);
41+
MatchAcc[WI] = Match;
42+
LeaderAcc[WI] = ClusterGroup.leader();
43+
};
44+
CGH.parallel_for<TestKernel<ClusterSize>>(NDR, KernelFunc);
45+
});
46+
47+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
48+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
49+
for (int WI = 0; WI < 32; ++WI) {
50+
assert(MatchAcc[WI] == true);
51+
assert(LeaderAcc[WI] == ((WI % ClusterSize) == 0));
52+
}
53+
}
54+
55+
int main() {
56+
test<1>();
57+
test<2>();
58+
test<4>();
59+
test<8>();
60+
test<16>();
61+
test<32>();
62+
return 0;
63+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// RUN: %clangxx -fsycl fsyntax-only -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
#include <sycl/sycl.hpp>
4+
namespace syclex = sycl::ext::oneapi::experimental;
5+
6+
#ifdef SYCL_EXT_ONEAPI_ROOT_GROUP
7+
static_assert(syclex::is_fixed_topology_group_v<syclex::root_group>);
8+
#endif
9+
static_assert(syclex::is_fixed_topology_group_v<sycl::group<1>>);
10+
static_assert(syclex::is_fixed_topology_group_v<sycl::group<2>>);
11+
static_assert(syclex::is_fixed_topology_group_v<sycl::group<3>>);
12+
static_assert(syclex::is_fixed_topology_group_v<sycl::sub_group>);
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %clangxx -fsycl fsyntax-only -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
#include <sycl/sycl.hpp>
4+
namespace syclex = sycl::ext::oneapi::experimental;
5+
6+
static_assert(
7+
syclex::is_user_constructed_group_v<syclex::ballot_group<sycl::sub_group>>);
8+
static_assert(syclex::is_user_constructed_group_v<
9+
syclex::cluster_group<1, sycl::sub_group>>);
10+
static_assert(syclex::is_user_constructed_group_v<
11+
syclex::cluster_group<2, sycl::sub_group>>);
12+
static_assert(
13+
syclex::is_user_constructed_group_v<syclex::tangle_group<sycl::sub_group>>);
14+
static_assert(syclex::is_user_constructed_group_v<syclex::opportunistic_group>);
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
//
5+
// UNSUPPORTED: cuda || hip
6+
7+
#include <sycl/sycl.hpp>
8+
#include <vector>
9+
namespace syclex = sycl::ext::oneapi::experimental;
10+
11+
class TestKernel;
12+
13+
int main() {
14+
sycl::queue Q;
15+
16+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
17+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
18+
std::cout << "Test skipped due to missing support for sub-group size 32."
19+
<< std::endl;
20+
return 0;
21+
}
22+
23+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
24+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
25+
26+
const auto NDR = sycl::nd_range<1>{32, 32};
27+
Q.submit([&](sycl::handler &CGH) {
28+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
29+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
30+
const auto KernelFunc =
31+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
32+
auto WI = item.get_global_id();
33+
auto SG = item.get_sub_group();
34+
35+
// Due to the unpredictable runtime behavior of opportunistic groups,
36+
// some values may change from run to run. Check they're in expected
37+
// ranges and consistent with other groups.
38+
if (item.get_global_id() % 2 == 0) {
39+
auto OpportunisticGroup =
40+
syclex::this_kernel::get_opportunistic_group();
41+
42+
bool Match = true;
43+
Match &= (OpportunisticGroup.get_group_id() == 0);
44+
Match &= (OpportunisticGroup.get_local_id() <
45+
OpportunisticGroup.get_local_range());
46+
Match &= (OpportunisticGroup.get_group_range() == 1);
47+
Match &= (OpportunisticGroup.get_local_linear_range() <=
48+
SG.get_local_linear_range());
49+
MatchAcc[WI] = Match;
50+
LeaderAcc[WI] = OpportunisticGroup.leader();
51+
}
52+
};
53+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
54+
});
55+
56+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
57+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
58+
uint32_t NumLeaders = 0;
59+
for (int WI = 0; WI < 32; ++WI) {
60+
if (WI % 2 == 0) {
61+
assert(MatchAcc[WI] == true);
62+
if (LeaderAcc[WI]) {
63+
NumLeaders++;
64+
}
65+
}
66+
}
67+
assert(NumLeaders > 0);
68+
return 0;
69+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
//
5+
// UNSUPPORTED: cuda || hip
6+
7+
#include <sycl/sycl.hpp>
8+
#include <vector>
9+
namespace syclex = sycl::ext::oneapi::experimental;
10+
11+
class TestKernel;
12+
13+
int main() {
14+
sycl::queue Q;
15+
16+
auto SGSizes = Q.get_device().get_info<sycl::info::device::sub_group_sizes>();
17+
if (std::find(SGSizes.begin(), SGSizes.end(), 32) == SGSizes.end()) {
18+
std::cout << "Test skipped due to missing support for sub-group size 32."
19+
<< std::endl;
20+
return 0;
21+
}
22+
23+
sycl::buffer<bool, 1> MatchBuf{sycl::range{32}};
24+
sycl::buffer<bool, 1> LeaderBuf{sycl::range{32}};
25+
26+
const auto NDR = sycl::nd_range<1>{32, 32};
27+
Q.submit([&](sycl::handler &CGH) {
28+
sycl::accessor MatchAcc{MatchBuf, CGH, sycl::write_only};
29+
sycl::accessor LeaderAcc{LeaderBuf, CGH, sycl::write_only};
30+
const auto KernelFunc =
31+
[=](sycl::nd_item<1> item) [[sycl::reqd_sub_group_size(32)]] {
32+
auto WI = item.get_global_id();
33+
auto SG = item.get_sub_group();
34+
35+
// Split into odd and even work-items via control flow
36+
// Branches deliberately duplicated to test impact of optimizations
37+
if (item.get_global_id() % 2 == 0) {
38+
auto TangleGroup = syclex::get_tangle_group(SG);
39+
40+
bool Match = true;
41+
Match &= (TangleGroup.get_group_id() == 0);
42+
Match &= (TangleGroup.get_local_id() == SG.get_local_id() / 2);
43+
Match &= (TangleGroup.get_group_range() == 1);
44+
Match &= (TangleGroup.get_local_range() == 16);
45+
MatchAcc[WI] = Match;
46+
LeaderAcc[WI] = TangleGroup.leader();
47+
} else {
48+
auto TangleGroup = syclex::get_tangle_group(SG);
49+
50+
bool Match = true;
51+
Match &= (TangleGroup.get_group_id() == 0);
52+
Match &= (TangleGroup.get_local_id() == SG.get_local_id() / 2);
53+
Match &= (TangleGroup.get_group_range() == 1);
54+
Match &= (TangleGroup.get_local_range() == 16);
55+
MatchAcc[WI] = Match;
56+
LeaderAcc[WI] = TangleGroup.leader();
57+
}
58+
};
59+
CGH.parallel_for<TestKernel>(NDR, KernelFunc);
60+
});
61+
62+
sycl::host_accessor MatchAcc{MatchBuf, sycl::read_only};
63+
sycl::host_accessor LeaderAcc{LeaderBuf, sycl::read_only};
64+
for (int WI = 0; WI < 32; ++WI) {
65+
assert(MatchAcc[WI] == true);
66+
assert(LeaderAcc[WI] == (WI < 2));
67+
}
68+
return 0;
69+
}

0 commit comments

Comments
 (0)