forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CPU][ARM] Add ACL executor for Transpose (openvinotoolkit#17322)
* separate executors + add acl executor fot transpose * correct axisCast * update transpose executors list * update new changes * enable tests * fix fortting * fixed test shapes and transpose generalization * fixed different signedness error * size_t usage in loop counters * undo unwanted changes * fixed comments * added i8 and fp32 to blocked x86 tests * fixed comments * fixed comments * extracted general reference executor from PermuteKernel * fix mayiuse in JitTransposeExecutorBuilder::isSupported * getDescWithType name refactoring * refactoring * removed 2nd executor creation in transpose node * Moved RefOptimizedTranspose to the top * fixed comments --------- Co-authored-by: Aleksandr Voron <aleksandr.voron@intel.com>
- Loading branch information
Showing
23 changed files
with
1,311 additions
and
697 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 67 additions & 0 deletions
67
src/plugins/intel_cpu/src/nodes/executors/acl/acl_transpose.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "acl_transpose.hpp" | ||
#include "acl_utils.hpp" | ||
|
||
bool ov::intel_cpu::ACLTransposeExecutor::init(const ov::intel_cpu::TransposeParams &transposeParams, | ||
const std::vector<MemoryDescPtr> &srcDescs, | ||
const std::vector<MemoryDescPtr> &dstDescs, | ||
const dnnl::primitive_attr &attr) { | ||
auto inputOrder = transposeParams.permuteParams.order; | ||
if (inputOrder.empty()) { | ||
inputOrder.resize(srcDescs[0]->getShape().getRank()); | ||
std::iota(inputOrder.begin(), inputOrder.end(), 0); | ||
} | ||
|
||
std::vector<int> vec; | ||
auto srcDims = srcDescs[0]->getShape().getStaticDims(); | ||
auto dstDims = dstDescs[0]->getShape().getStaticDims(); | ||
if (srcDescs[0]->hasLayoutType(LayoutType::nspc)) { | ||
changeLayoutToNhwc(srcDims); | ||
changeLayoutToNhwc(dstDims); | ||
for (int i = inputOrder.size() - 1; i >= 0 ; --i) { | ||
auto it = find(srcDims.rbegin(), srcDims.rend(), dstDims[i]); | ||
int index = it - srcDims.rbegin(); | ||
vec.push_back(index); | ||
} | ||
} else { | ||
for (unsigned int i = 0; i < inputOrder.size(); ++i) { | ||
vec.push_back(axisCast(inputOrder[i], inputOrder.size())); | ||
} | ||
std::reverse(vec.begin(), vec.end()); | ||
} | ||
arm_compute::PermutationVector order; | ||
for (unsigned int i = 0; i < inputOrder.size(); ++i) { | ||
order.set(i, vec[i]); | ||
} | ||
auto srcTensorInfo = arm_compute::TensorInfo(shapeCast(srcDims), 1, | ||
precisionToAclDataType(srcDescs[0]->getPrecision()), | ||
getAclDataLayoutByMemoryDesc(srcDescs[0])); | ||
auto dstTensorInfo = arm_compute::TensorInfo(shapeCast(dstDims), 1, | ||
precisionToAclDataType(dstDescs[0]->getPrecision()), | ||
getAclDataLayoutByMemoryDesc(dstDescs[0])); | ||
arm_compute::Status status = arm_compute::NEPermute::validate(&srcTensorInfo, &dstTensorInfo, order); | ||
if (!status) { | ||
DEBUG_LOG("NEPermute validation failed: ", status.error_description()); | ||
return false; | ||
} | ||
srcTensor.allocator()->init(srcTensorInfo); | ||
dstTensor.allocator()->init(dstTensorInfo); | ||
|
||
acl_permute = std::make_unique<arm_compute::NEPermute>(); | ||
acl_permute->configure(&srcTensor, &dstTensor, order); | ||
return true; | ||
} | ||
|
||
void ov::intel_cpu::ACLTransposeExecutor::exec(const std::vector<MemoryCPtr> &src, const std::vector<MemoryPtr> &dst, | ||
const int MB) { | ||
srcTensor.allocator()->import_memory(src[0]->getData()); | ||
dstTensor.allocator()->import_memory(dst[0]->getData()); | ||
|
||
acl_permute->run(); | ||
|
||
srcTensor.allocator()->free(); | ||
dstTensor.allocator()->free(); | ||
} |
57 changes: 57 additions & 0 deletions
57
src/plugins/intel_cpu/src/nodes/executors/acl/acl_transpose.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// Copyright (C) 2018-2023 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "nodes/executors/transpose.hpp" | ||
#include "utils/debug_capabilities.h" | ||
|
||
namespace ov { | ||
namespace intel_cpu { | ||
|
||
class ACLTransposeExecutor : public TransposeExecutor { | ||
public: | ||
using TransposeExecutor::TransposeExecutor; | ||
|
||
bool init(const TransposeParams& transposeParams, | ||
const std::vector<MemoryDescPtr>& srcDescs, | ||
const std::vector<MemoryDescPtr>& dstDescs, | ||
const dnnl::primitive_attr &attr) override; | ||
void exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const int MB) override; | ||
impl_desc_type getImplType() const override { return implType; } | ||
private: | ||
static const impl_desc_type implType = impl_desc_type::acl; | ||
arm_compute::Tensor srcTensor, dstTensor; | ||
std::unique_ptr<arm_compute::NEPermute> acl_permute; | ||
}; | ||
|
||
class ACLTransposeExecutorBuilder : public TransposeExecutorBuilder { | ||
public: | ||
bool isSupported(const TransposeParams& transposeParams, | ||
const std::vector<MemoryDescPtr>& srcDescs, | ||
const std::vector<MemoryDescPtr>& dstDescs) const override { | ||
if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) && | ||
dstDescs[0]->hasLayoutType(LayoutType::ncsp)) && | ||
!(srcDescs[0]->hasLayoutType(LayoutType::nspc) && | ||
dstDescs[0]->hasLayoutType(LayoutType::nspc))) { | ||
DEBUG_LOG("NEPermute does not support precisions:", | ||
" src: ", srcDescs[0]->serializeFormat(), | ||
" dst: ", dstDescs[0]->serializeFormat()); | ||
return false; | ||
} | ||
if (srcDescs[0]->getShape().getRank() > 4) { | ||
DEBUG_LOG("NEPermute supports up to 4D input tensor. Passed tensor rank: ", | ||
srcDescs[0]->getShape().getRank()); | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
TransposeExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override { | ||
return std::make_shared<ACLTransposeExecutor>(context); | ||
} | ||
}; | ||
|
||
} // namespace intel_cpu | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.