@@ -34,13 +34,18 @@ void test(queue q, InputContainer input, OutputContainer output,
34
34
cgh.parallel_for <SpecializationKernelName>(
35
35
nd_range<1 >(G, G), [=](nd_item<1 > it) {
36
36
group<1 > g = it.get_group ();
37
+ auto sg = it.get_sub_group ();
37
38
int lid = it.get_local_id (0 );
38
39
out[0 ] = reduce_over_group (g, in[lid], binary_op);
39
40
out[1 ] = reduce_over_group (g, in[lid], init, binary_op);
40
41
out[2 ] = joint_reduce (g, in.get_pointer (), in.get_pointer () + N,
41
42
binary_op);
42
43
out[3 ] = joint_reduce (g, in.get_pointer (), in.get_pointer () + N,
43
44
init, binary_op);
45
+ out[4 ] = joint_reduce (sg, in.get_pointer (), in.get_pointer () + N,
46
+ binary_op);
47
+ out[5 ] = joint_reduce (sg, in.get_pointer (), in.get_pointer () + N,
48
+ init, binary_op);
44
49
});
45
50
});
46
51
}
@@ -54,6 +59,10 @@ void test(queue q, InputContainer input, OutputContainer output,
54
59
std::accumulate (input.begin (), input.end (), identity, binary_op));
55
60
assert (output[3 ] ==
56
61
std::accumulate (input.begin (), input.end (), init, binary_op));
62
+ assert (output[4 ] ==
63
+ std::accumulate (input.begin (), input.end (), identity, binary_op));
64
+ assert (output[5 ] ==
65
+ std::accumulate (input.begin (), input.end (), init, binary_op));
57
66
}
58
67
59
68
int main () {
@@ -65,7 +74,7 @@ int main() {
65
74
66
75
constexpr int N = 128 ;
67
76
std::array<int , N> input;
68
- std::array<int , 4 > output;
77
+ std::array<int , 6 > output;
69
78
std::iota (input.begin (), input.end (), 0 );
70
79
std::fill (output.begin (), output.end (), 0 );
71
80
@@ -93,7 +102,7 @@ int main() {
93
102
// sycl::plus binary operation.
94
103
#ifdef SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS
95
104
std::array<std::complex<float >, N> input_cf;
96
- std::array<std::complex<float >, 4 > output_cf;
105
+ std::array<std::complex<float >, 6 > output_cf;
97
106
std::iota (input_cf.begin (), input_cf.end (), 0 );
98
107
std::fill (output_cf.begin (), output_cf.end (), 0 );
99
108
test<class KernelNamePlusComplexF >(q, input_cf, output_cf,
0 commit comments