Small reduction clean-ups

ndgrigorian · ndgrigorian · commit 8240c769ac35 · 2023-09-27T17:10:34.000-07:00
Removed unnecessary copies in custom_reduce_over_group

Sequential reduction now casts before calling operator (makes behavior explicit rather than implicit)
diff --git a/dpctl/tensor/libtensor/include/kernels/reductions.hpp b/dpctl/tensor/libtensor/include/kernels/reductions.hpp
@@ -97,7 +97,9 @@ struct SequentialReduction
             const py::ssize_t inp_offset =
                 inp_iter_offset + inp_reduction_offset;
 
-            red_val = reduction_op_(red_val, inp_[inp_offset]);
+            using dpctl::tensor::type_utils::convert_impl;
+            outT val = convert_impl<outT, argT>(inp_[inp_offset]);
+            red_val = reduction_op_(red_val, val);
         }
 
         out_[out_iter_offset] = red_val;
diff --git a/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp b/dpctl/tensor/libtensor/include/utils/sycl_utils.hpp
@@ -132,10 +132,10 @@ size_t choose_workgroup_size(const size_t nelems,
 }
 
 template <typename T, typename GroupT, typename LocAccT, typename OpT>
-T custom_reduce_over_group(GroupT wg,
+T custom_reduce_over_group(const GroupT &wg,
                            LocAccT local_mem_acc,
-                           T local_val,
-                           OpT op)
+                           const T &local_val,
+                           const OpT &op)
 {
     size_t wgs = wg.get_local_linear_range();
     local_mem_acc[wg.get_local_linear_id()] = local_val;

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,9 @@ struct SequentialReduction`
`97`	`97`	`const py::ssize_t inp_offset =`
`98`	`98`	`inp_iter_offset + inp_reduction_offset;`
`99`	`99`
`100`		`- red_val = reduction_op_(red_val, inp_[inp_offset]);`
	`100`	`+ using dpctl::tensor::type_utils::convert_impl;`
	`101`	`+ outT val = convert_impl<outT, argT>(inp_[inp_offset]);`
	`102`	`+ red_val = reduction_op_(red_val, val);`
`101`	`103`	`}`
`102`	`104`
`103`	`105`	`out_[out_iter_offset] = red_val;`
Original file line number	Diff line number	Diff line change
`@@ -132,10 +132,10 @@ size_t choose_workgroup_size(const size_t nelems,`
`132`	`132`	`}`
`133`	`133`
`134`	`134`	`template <typename T, typename GroupT, typename LocAccT, typename OpT>`
`135`		`-T custom_reduce_over_group(GroupT wg,`
	`135`	`+T custom_reduce_over_group(const GroupT &wg,`
`136`	`136`	`LocAccT local_mem_acc,`
`137`		`- T local_val,`
`138`		`- OpT op)`
	`137`	`+ const T &local_val,`
	`138`	`+ const OpT &op)`
`139`	`139`	`{`
`140`	`140`	`size_t wgs = wg.get_local_linear_range();`
`141`	`141`	`local_mem_acc[wg.get_local_linear_id()] = local_val;`