Skip to content

Commit 5f8b75e

Browse files
committed
[SYCL] Additional fixes to LIT tests per reviewers' coments
Signed-off-by: Vyacheslav N Klochkov <vyacheslav.n.klochkov@intel.com>
1 parent 4d8dc85 commit 5f8b75e

File tree

2 files changed

+33
-17
lines changed

2 files changed

+33
-17
lines changed

sycl/test/reduction/reduction_placeholder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// RUN: %GPU_RUN_PLACEHOLDER %t.out
77
// RUN: %ACC_RUN_PLACEHOLDER %t.out
88

9+
// RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
910
// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
1011

1112
// This test performs basic checks of parallel_for(nd_range, reduction, func)

sycl/test/reduction/reduction_usm.cpp

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
// UNSUPPORTED: cuda
22
// Reductions use work-group builtins not yet supported by CUDA.
33

4-
// UNSUPPORTED: linux
5-
// TODO: Enable the test for Linux when CI uses GPU driver 20.06.15619 or newer.
6-
74
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
85
// RUN: %CPU_RUN_PLACEHOLDER %t.out
96
// RUN: %GPU_RUN_PLACEHOLDER %t.out
107
// RUN: %ACC_RUN_PLACEHOLDER %t.out
118

9+
// RUNx: env SYCL_DEVICE_TYPE=HOST %t.out
1210
// TODO: Enable the test for HOST when it supports intel::reduce() and barrier()
1311

1412
// This test performs basic checks of parallel_for(nd_range, reduction, func)
@@ -24,22 +22,29 @@ template <typename T, int Dim, class BinaryOperation>
2422
class SomeClass;
2523

2624
template <typename T, int Dim, class BinaryOperation>
27-
void test(T Identity, size_t WGSize, size_t NWItems) {
25+
void test(T Identity, size_t WGSize, size_t NWItems, usm::alloc AllocType) {
2826
queue Q;
2927
auto Dev = Q.get_device();
30-
if (!Dev.get_info<info::device::usm_shared_allocations>())
28+
29+
if (AllocType == usm::alloc::shared &&
30+
!Dev.get_info<info::device::usm_shared_allocations>())
31+
return;
32+
if (AllocType == usm::alloc::host &&
33+
!Dev.get_info<info::device::usm_host_allocations>())
3134
return;
3235

36+
T *ReduVarPtr = (T *)malloc(sizeof(T), Dev, Q.get_context(), AllocType);
37+
if (ReduVarPtr == nullptr)
38+
return;
39+
*ReduVarPtr = Identity;
40+
3341
// Initialize.
3442
T CorrectOut;
3543
BinaryOperation BOp;
3644

3745
buffer<T, 1> InBuf(NWItems);
3846
initInputData(InBuf, CorrectOut, Identity, BOp, NWItems);
3947

40-
T *ReduVarPtr = (T *)malloc_shared(sizeof(T), Dev, Q.get_context());
41-
*ReduVarPtr = Identity;
42-
4348
// Compute.
4449
Q.submit([&](handler &CGH) {
4550
auto In = InBuf.template get_access<access::mode::read>(CGH);
@@ -61,26 +66,36 @@ void test(T Identity, size_t WGSize, size_t NWItems) {
6166
<< ", Expected value: " << CorrectOut << "\n";
6267
assert(0 && "Wrong value.");
6368
}
69+
6470
free(ReduVarPtr, Q.get_context());
6571
}
6672

73+
template <typename T, int Dim, class BinaryOperation>
74+
void testUSM(T Identity, size_t WGSize, size_t NWItems) {
75+
test<T, Dim, BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::shared);
76+
test<T, Dim, BinaryOperation>(Identity, WGSize, NWItems, usm::alloc::host);
77+
}
78+
6779
int main() {
6880
// fast atomics and fast reduce
69-
test<int, 1, intel::plus<int>>(0, 49, 49 * 5);
70-
test<int, 0, intel::plus<int>>(0, 8, 128);
81+
testUSM<int, 1, intel::plus<int>>(0, 49, 49 * 5);
82+
testUSM<int, 0, intel::plus<int>>(0, 8, 128);
7183

7284
// fast atomics
73-
test<int, 0, intel::bit_or<int>>(0, 7, 7 * 3);
74-
test<int, 1, intel::bit_or<int>>(0, 4, 128);
85+
testUSM<int, 0, intel::bit_or<int>>(0, 7, 7 * 3);
86+
testUSM<int, 1, intel::bit_or<int>>(0, 4, 128);
7587

7688
// fast reduce
77-
test<float, 1, intel::minimum<float>>(std::numeric_limits<float>::max(), 5, 5 * 7);
78-
test<float, 0, intel::maximum<float>>(std::numeric_limits<float>::min(), 4, 128);
89+
testUSM<float, 1, intel::minimum<float>>(
90+
(std::numeric_limits<float>::max)(), 5, 5 * 7);
91+
testUSM<float, 0, intel::maximum<float>>(
92+
(std::numeric_limits<float>::min)(), 4, 128);
7993

8094
// generic algorithm
81-
test<int, 0, std::multiplies<int>>(1, 7, 7 * 5);
82-
test<int, 1, std::multiplies<int>>(1, 8, 16);
83-
test<CustomVec<short>, 0, CustomVecPlus<short>>(CustomVec<short>(0), 8, 8 * 3);
95+
testUSM<int, 0, std::multiplies<int>>(1, 7, 7 * 5);
96+
testUSM<int, 1, std::multiplies<int>>(1, 8, 16);
97+
testUSM<CustomVec<short>, 0, CustomVecPlus<short>>(
98+
CustomVec<short>(0), 8, 8 * 3);
8499

85100
std::cout << "Test passed\n";
86101
return 0;

0 commit comments

Comments
 (0)