Skip to content

Commit 8240c76

Browse files
committed
Small reduction clean-ups
Removed unnecessary copies in custom_reduce_over_group Sequential reduction now casts before calling operator (makes behavior explicit rather than implicit)
1 parent 124b92c commit 8240c76

File tree

2 files changed

+6
-4
lines changed

2 files changed

+6
-4
lines changed

dpctl/tensor/libtensor/include/kernels/reductions.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ struct SequentialReduction
9797
const py::ssize_t inp_offset =
9898
inp_iter_offset + inp_reduction_offset;
9999

100-
red_val = reduction_op_(red_val, inp_[inp_offset]);
100+
using dpctl::tensor::type_utils::convert_impl;
101+
outT val = convert_impl<outT, argT>(inp_[inp_offset]);
102+
red_val = reduction_op_(red_val, val);
101103
}
102104

103105
out_[out_iter_offset] = red_val;

dpctl/tensor/libtensor/include/utils/sycl_utils.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,10 @@ size_t choose_workgroup_size(const size_t nelems,
132132
}
133133

134134
template <typename T, typename GroupT, typename LocAccT, typename OpT>
135-
T custom_reduce_over_group(GroupT wg,
135+
T custom_reduce_over_group(const GroupT &wg,
136136
LocAccT local_mem_acc,
137-
T local_val,
138-
OpT op)
137+
const T &local_val,
138+
const OpT &op)
139139
{
140140
size_t wgs = wg.get_local_linear_range();
141141
local_mem_acc[wg.get_local_linear_id()] = local_val;

0 commit comments

Comments
 (0)