|
21 | 21 | //
|
22 | 22 | // TODO: Test global memory for temporary storage
|
23 | 23 | // TODO: Consider using USM instead of buffers
|
24 |
| -// |
| 24 | +// TODO: Add support for sorting over workgroup for CUDA and HIP BE |
25 | 25 |
|
26 | 26 | #include <sycl/sycl.hpp>
|
27 | 27 |
|
@@ -312,12 +312,18 @@ template <class T> void RunOverType(sycl::queue &Q, size_t DataSize) {
|
312 | 312 | RunSortOVerGroup<UseGroupT::WorkGroup, 1>(Q, Data, Comparator);
|
313 | 313 | RunSortOVerGroup<UseGroupT::WorkGroup, 2>(Q, Data, Comparator);
|
314 | 314 |
|
315 |
| - RunSortOVerGroup<UseGroupT::SubGroup, 1>(Q, Data, Comparator); |
316 |
| - RunSortOVerGroup<UseGroupT::SubGroup, 2>(Q, Data, Comparator); |
317 |
| - |
318 | 315 | RunJointSort<UseGroupT::WorkGroup, 1>(Q, Data, Comparator);
|
319 | 316 | RunJointSort<UseGroupT::WorkGroup, 2>(Q, Data, Comparator);
|
320 | 317 |
|
| 318 | + if (Q.get_backend() == sycl::backend::ext_oneapi_cuda || |
| 319 | + Q.get_backend() == sycl::backend::ext_oneapi_hip) { |
| 320 | + std::cout << "Note! Skipping sub group testing on CUDA BE" << std::endl; |
| 321 | + return; |
| 322 | + } |
| 323 | + |
| 324 | + RunSortOVerGroup<UseGroupT::SubGroup, 1>(Q, Data, Comparator); |
| 325 | + RunSortOVerGroup<UseGroupT::SubGroup, 2>(Q, Data, Comparator); |
| 326 | + |
321 | 327 | RunJointSort<UseGroupT::SubGroup, 1>(Q, Data, Comparator);
|
322 | 328 | RunJointSort<UseGroupT::SubGroup, 2>(Q, Data, Comparator);
|
323 | 329 | };
|
|
0 commit comments