Incorporating review comments: removing nesting to check data type an…

…d removing exec_ten uses
pytorch · cad-audio · Oct 23, 2024 · Oct 24, 2024 · Nov 6, 2024 · Nov 7, 2024
commit 1f681c732c3ba4b2a1bdb18665b9237b74c1f0ac
diff --git a/backends/cadence/hifi/operators/op_atan2.cpp b/backends/cadence/hifi/operators/op_atan2.cpp
@@ -8,41 +8,66 @@
 
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
+#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <cmath>
 
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
+using executorch::runtime::isFloatingType;
 using executorch::runtime::KernelRuntimeContext;
+using executorch::runtime::promoteTypes;
 using executorch::runtime::tensors_have_same_dim_order;
 using torch::executor::Error;
 using torch::executor::resize_to_broadcast_target_size;
+using torch::executor::native::utils::apply_bitensor_elementwise_fn;
+using torch::executor::native::utils::get_compute_type;
+using torch::executor::native::utils::SupportedTensorDtypes;
 
 namespace cadence {
 namespace impl {
 namespace HiFi {
 namespace native {
 
+namespace {
+
+ScalarType get_common_type(ScalarType a_type, ScalarType b_type) {
+  if (isFloatingType(a_type) && isFloatingType(b_type)) {
+    return promoteTypes(a_type, b_type);
+  } else if (isFloatingType(a_type)) {
+    return a_type;
+  } else if (isFloatingType(b_type)) {
+    return b_type;
+  }
+  return ScalarType::Float;
+}
+
+} // namespace
+
 Tensor& atan2_out(
     KernelRuntimeContext& ctx,
     const Tensor& a,
     const Tensor& b,
     Tensor& out) {
+  // Common Dtype
+  ScalarType common_type = get_common_type(a.scalar_type(), b.scalar_type());
+
+  // Check Dim Order
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dim_order(a, b, out), InvalidArgument, out);
+
   // Determine output size and resize for dynamic shapes
   ET_KERNEL_CHECK(
       ctx,
       resize_to_broadcast_target_size(a, b, out) == Error::Ok,
       InvalidArgument,
       out);
 
-  ET_KERNEL_CHECK(
-      ctx, tensors_have_same_dim_order(a, b, out), InvalidArgument, out);
-
-  ScalarType a_type = a.scalar_type();
-  ScalarType b_type = b.scalar_type();
   ScalarType out_type = out.scalar_type();
 
-  constexpr auto name = "atan2.out";
+  ScalarType compute_type = get_compute_type(common_type);
+
+  static constexpr const char op_name[] = "atan2.out";
   constexpr int kNnlibMaxDim = 16;
   int a_dim = a.dim(), b_dim = b.dim(), out_dim = out.dim();
   bool optimized = true;
@@ -180,21 +205,18 @@ Tensor& atan2_out(
     return out;
   }
 
-  ET_SWITCH_REALHB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
-    ET_SWITCH_REALHB_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
-      ET_SWITCH_FLOATH_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
-        torch::executor::
-            apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
-                [](const CTYPE_A val_a, const CTYPE_B val_b) {
-                  CTYPE_OUT casted_a = static_cast<CTYPE_OUT>(val_a);
-                  CTYPE_OUT casted_b = static_cast<CTYPE_OUT>(val_b);
-                  return static_cast<CTYPE_OUT>(std::atan2(casted_a, casted_b));
-                },
-                a,
-                b,
-                out);
-      });
-    });
+  ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
+    apply_bitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
+        [](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
+          return std::atan2(val_a, val_b);
+        },
+        ctx,
+        a,
+        SupportedTensorDtypes::REALHBBF16,
+        b,
+        SupportedTensorDtypes::REALHBBF16,
+        out,
+        SupportedTensorDtypes::FLOATHBF16);
   });
 
   return out;

diff --git a/backends/cadence/hifi/operators/op_cat.cpp b/backends/cadence/hifi/operators/op_cat.cpp
@@ -12,9 +12,9 @@
 
 #include <executorch/backends/cadence/hifi/kernels/kernels.h>
 
-using exec_aten::ScalarType;
-using exec_aten::Tensor;
 using executorch::aten::RuntimeContext;
+using executorch::aten::ScalarType;
+using executorch::aten::Tensor;
 using executorch::runtime::getLeadingDims;
 using executorch::runtime::getTrailingDims;
 using executorch::runtime::resize_tensor;
@@ -33,6 +33,36 @@ Tensor& cat_out(
     exec_aten::ArrayRef<Tensor> tensors,
     int64_t dim,
     Tensor& out) {
+  if (dim < 0) {
+    dim += out.dim();
+  }
+
+  ET_KERNEL_CHECK(ctx, check_cat_args(tensors, dim, out), Internal, out);
+
+  Tensor::SizesType
+      expected_out_size[executorch::runtime::kTensorDimensionLimit];
+  size_t expected_out_dim = 0;
+  get_cat_out_target_size(tensors, dim, expected_out_size, &expected_out_dim);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
+      InvalidArgument,
+      out);
+
+  // Special handling when all inputs are 1D-empty tensors for aten consistency
+  // In that case, just return an 1D-empty tensor without checking dim
+  bool all_1d_empty = true;
+  for (size_t i = 0; i < tensors.size(); ++i) {
+    if (tensors[i].numel() != 0 || tensors[i].dim() != 1) {
+      all_1d_empty = false;
+      break;
+    }
+  }
+  if (all_1d_empty) {
+    return out;
+  }
+
   constexpr auto name = "cat.out";
   constexpr int kNnlibMaxDim = 16;
 
@@ -92,36 +122,6 @@ Tensor& cat_out(
     return out;
   }
 
-  if (dim < 0) {
-    dim += out.dim();
-  }
-
-  ET_KERNEL_CHECK(ctx, check_cat_args(tensors, dim, out), Internal, out);
-
-  Tensor::SizesType
-      expected_out_size[executorch::runtime::kTensorDimensionLimit];
-  size_t expected_out_dim = 0;
-  get_cat_out_target_size(tensors, dim, expected_out_size, &expected_out_dim);
-
-  ET_KERNEL_CHECK(
-      ctx,
-      resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
-      InvalidArgument,
-      out);
-
-  // Special handling when all inputs are 1D-empty tensors for aten consistency
-  // In that case, just return an 1D-empty tensor without checking dim
-  bool all_1d_empty = true;
-  for (size_t i = 0; i < tensors.size(); ++i) {
-    if (tensors[i].numel() != 0 || tensors[i].dim() != 1) {
-      all_1d_empty = false;
-      break;
-    }
-  }
-  if (all_1d_empty) {
-    return out;
-  }
-
   const size_t outer = getLeadingDims(out, dim);
   const size_t dim_stride = getTrailingDims(out, dim);
   const size_t ninputs = tensors.size();