Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions sycl/include/sycl/item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@ template <typename TransformedArgType, int Dims, typename KernelType>
class RoundedRangeKernel;
template <typename TransformedArgType, int Dims, typename KernelType>
class RoundedRangeKernelWithKH;

namespace reduction {
template <int Dims>
item<Dims, false> getDelinearizedItem(range<Dims> Range, id<Dims> Id);
} // namespace reduction
} // namespace detail
template <int dimensions> class id;
template <int dimensions> class range;

/// Identifies an instance of the function object executing at each point
/// in a range.
Expand Down Expand Up @@ -130,6 +133,10 @@ template <int dimensions = 1, bool with_offset = true> class item {
friend class detail::RoundedRangeKernelWithKH;
void set_allowed_range(const range<dimensions> rnwi) { MImpl.MExtent = rnwi; }

template <int Dims>
friend item<Dims, false>
detail::reduction::getDelinearizedItem(range<Dims> Range, id<Dims> Id);

detail::ItemBase<dimensions, with_offset> MImpl;
};

Expand Down
12 changes: 11 additions & 1 deletion sycl/include/sycl/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2369,8 +2369,18 @@ void reduction_parallel_for(handler &CGH, range<Dims> Range,
size_t Start = GroupStart + NDId.get_local_id(0);
size_t End = GroupEnd;
size_t Stride = NDId.get_local_range(0);
auto GetDelinearized = [&](size_t I) {
auto Id = getDelinearizedId(Range, I);
if constexpr (std::is_invocable_v<decltype(KernelFunc), id<Dims>,
decltype(Reducers)...>)
return Id;
else
// SYCL doesn't provide parallel_for accepting offset in presence of
// reductions, so use with_offset==false.
return reduction::getDelinearizedItem(Range, Id);
};
for (size_t I = Start; I < End; I += Stride)
KernelFunc(getDelinearizedId(Range, I), Reducers...);
KernelFunc(GetDelinearized(I), Reducers...);
};
if constexpr (NumArgs == 2) {
using Reduction = std::tuple_element_t<0, decltype(ReduTuple)>;
Expand Down
5 changes: 5 additions & 0 deletions sycl/include/sycl/reduction_forward.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ enum class strategy : int {
// are limited to those below.
inline void finalizeHandler(handler &CGH);
template <class FunctorTy> void withAuxHandler(handler &CGH, FunctorTy Func);

template <int Dims>
item<Dims, false> getDelinearizedItem(range<Dims> Range, id<Dims> Id) {
return {Range, Id};
}
} // namespace reduction

template <typename KernelName,
Expand Down