Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimized atan2, _softmax, cat, clamp, full, relu, remainder, permute_copy_out ops and updates to use memory_allocator #7567

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
216389c
Adding mean and where ops optimized on HiFi
dijopaul Oct 23, 2024
3d849bb
Merge pull request #14 from dijopaul/main
cad-audio Oct 24, 2024
9b71aed
Adding quantized linear optimized versions for int8 and uint8
dijopaul Nov 6, 2024
07743ab
adding pow, remainder, minimum, maximum operators (#33)
nishpoonia Nov 7, 2024
edc1b3d
Fix for build issue faced in div_mod on old tools
dijopaul Nov 13, 2024
222beee
Merge pull request #15 from dijopaul/main
cad-audio Nov 14, 2024
6e074ec
Merge branch 'main' into main
cad-audio Nov 14, 2024
afca3db
Fix build failure due to merge issue
dijopaul Nov 19, 2024
10a0ee0
Merge branch 'main' into main
mcremon-meta Nov 21, 2024
f1f0bb3
Fixing review comments on PR 6867
dijopaul Nov 22, 2024
f8cf408
Malloc fix (#39)
dijopaul Nov 28, 2024
911021f
Cleaning cmakelist to avoid duplications
dijopaul Dec 2, 2024
18cf518
Fixing lint issues and removing free statements
dijopaul Dec 3, 2024
5e471f2
adding ET_KERNEL_CHECK for allocate_temp_memory (#41)
nishpoonia Dec 23, 2024
6928f95
Merge branch 'main' into main_PR18
dijopaul Jan 9, 2025
991961b
Fixing lint error due to merge
dijopaul Jan 9, 2025
7585ee0
Merge pull request #18 from dijopaul/main_PR18
cad-audio Jan 9, 2025
540243a
Update functions_hifi.yaml
dijopaul Jan 9, 2025
85e7c59
Merge pull request #19 from dijopaul/patch-1
cad-audio Jan 9, 2025
1f681c7
Incorporating review comments: removing nesting to check data type an…
nishpoonia Jan 10, 2025
3539f52
clean up
nishpoonia Jan 13, 2025
fe5e7d7
Merge pull request #20 from dijopaul/main_PR18
cad-audio Jan 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Incorporating review comments: removing nesting to check data type an…
…d removing exec_ten uses
  • Loading branch information
nishpoonia committed Jan 10, 2025
commit 1f681c732c3ba4b2a1bdb18665b9237b74c1f0ac
68 changes: 45 additions & 23 deletions backends/cadence/hifi/operators/op_atan2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,41 +8,66 @@

#include <executorch/backends/cadence/hifi/kernels/kernels.h>
#include <executorch/kernels/portable/cpu/util/broadcast_util.h>
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
#include <executorch/runtime/kernel/kernel_includes.h>
#include <cmath>

using exec_aten::ScalarType;
using exec_aten::Tensor;
using executorch::aten::ScalarType;
using executorch::aten::Tensor;
using executorch::runtime::isFloatingType;
using executorch::runtime::KernelRuntimeContext;
using executorch::runtime::promoteTypes;
using executorch::runtime::tensors_have_same_dim_order;
using torch::executor::Error;
using torch::executor::resize_to_broadcast_target_size;
using torch::executor::native::utils::apply_bitensor_elementwise_fn;
using torch::executor::native::utils::get_compute_type;
using torch::executor::native::utils::SupportedTensorDtypes;

namespace cadence {
namespace impl {
namespace HiFi {
namespace native {

namespace {

ScalarType get_common_type(ScalarType a_type, ScalarType b_type) {
if (isFloatingType(a_type) && isFloatingType(b_type)) {
return promoteTypes(a_type, b_type);
} else if (isFloatingType(a_type)) {
return a_type;
} else if (isFloatingType(b_type)) {
return b_type;
}
return ScalarType::Float;
}

} // namespace

Tensor& atan2_out(
KernelRuntimeContext& ctx,
const Tensor& a,
const Tensor& b,
Tensor& out) {
// Common Dtype
ScalarType common_type = get_common_type(a.scalar_type(), b.scalar_type());

// Check Dim Order
ET_KERNEL_CHECK(
ctx, tensors_have_same_dim_order(a, b, out), InvalidArgument, out);

// Determine output size and resize for dynamic shapes
ET_KERNEL_CHECK(
ctx,
resize_to_broadcast_target_size(a, b, out) == Error::Ok,
InvalidArgument,
out);

ET_KERNEL_CHECK(
ctx, tensors_have_same_dim_order(a, b, out), InvalidArgument, out);

ScalarType a_type = a.scalar_type();
ScalarType b_type = b.scalar_type();
ScalarType out_type = out.scalar_type();

constexpr auto name = "atan2.out";
ScalarType compute_type = get_compute_type(common_type);

static constexpr const char op_name[] = "atan2.out";
constexpr int kNnlibMaxDim = 16;
int a_dim = a.dim(), b_dim = b.dim(), out_dim = out.dim();
bool optimized = true;
Expand Down Expand Up @@ -180,21 +205,18 @@ Tensor& atan2_out(
return out;
}

ET_SWITCH_REALHB_TYPES(a_type, ctx, name, CTYPE_A, [&]() {
ET_SWITCH_REALHB_TYPES(b_type, ctx, name, CTYPE_B, [&]() {
ET_SWITCH_FLOATH_TYPES(out_type, ctx, name, CTYPE_OUT, [&]() {
torch::executor::
apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
[](const CTYPE_A val_a, const CTYPE_B val_b) {
CTYPE_OUT casted_a = static_cast<CTYPE_OUT>(val_a);
CTYPE_OUT casted_b = static_cast<CTYPE_OUT>(val_b);
return static_cast<CTYPE_OUT>(std::atan2(casted_a, casted_b));
},
a,
b,
out);
});
});
ET_SWITCH_FLOAT_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() {
apply_bitensor_elementwise_fn<CTYPE_COMPUTE, op_name>(
[](const CTYPE_COMPUTE val_a, const CTYPE_COMPUTE val_b) {
return std::atan2(val_a, val_b);
},
ctx,
a,
SupportedTensorDtypes::REALHBBF16,
b,
SupportedTensorDtypes::REALHBBF16,
out,
SupportedTensorDtypes::FLOATHBF16);
});

return out;
Expand Down
64 changes: 32 additions & 32 deletions backends/cadence/hifi/operators/op_cat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@

#include <executorch/backends/cadence/hifi/kernels/kernels.h>

using exec_aten::ScalarType;
using exec_aten::Tensor;
using executorch::aten::RuntimeContext;
using executorch::aten::ScalarType;
using executorch::aten::Tensor;
using executorch::runtime::getLeadingDims;
using executorch::runtime::getTrailingDims;
using executorch::runtime::resize_tensor;
Expand All @@ -33,6 +33,36 @@ Tensor& cat_out(
exec_aten::ArrayRef<Tensor> tensors,
int64_t dim,
Tensor& out) {
if (dim < 0) {
dim += out.dim();
}

ET_KERNEL_CHECK(ctx, check_cat_args(tensors, dim, out), Internal, out);

Tensor::SizesType
expected_out_size[executorch::runtime::kTensorDimensionLimit];
size_t expected_out_dim = 0;
get_cat_out_target_size(tensors, dim, expected_out_size, &expected_out_dim);

ET_KERNEL_CHECK(
ctx,
resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
InvalidArgument,
out);

// Special handling when all inputs are 1D-empty tensors for aten consistency
// In that case, just return an 1D-empty tensor without checking dim
bool all_1d_empty = true;
for (size_t i = 0; i < tensors.size(); ++i) {
if (tensors[i].numel() != 0 || tensors[i].dim() != 1) {
all_1d_empty = false;
break;
}
}
if (all_1d_empty) {
return out;
}

constexpr auto name = "cat.out";
constexpr int kNnlibMaxDim = 16;

Expand Down Expand Up @@ -92,36 +122,6 @@ Tensor& cat_out(
return out;
}

if (dim < 0) {
dim += out.dim();
}

ET_KERNEL_CHECK(ctx, check_cat_args(tensors, dim, out), Internal, out);

Tensor::SizesType
expected_out_size[executorch::runtime::kTensorDimensionLimit];
size_t expected_out_dim = 0;
get_cat_out_target_size(tensors, dim, expected_out_size, &expected_out_dim);

ET_KERNEL_CHECK(
ctx,
resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
InvalidArgument,
out);

// Special handling when all inputs are 1D-empty tensors for aten consistency
// In that case, just return an 1D-empty tensor without checking dim
bool all_1d_empty = true;
for (size_t i = 0; i < tensors.size(); ++i) {
if (tensors[i].numel() != 0 || tensors[i].dim() != 1) {
all_1d_empty = false;
break;
}
}
if (all_1d_empty) {
return out;
}

const size_t outer = getLeadingDims(out, dim);
const size_t dim_stride = getTrailingDims(out, dim);
const size_t ninputs = tensors.size();
Expand Down
Loading