syaifulnizamiphone7
diff --git a/‎src/plugins/intel_cpu/src/nodes/deconv.cpp
+152-43 b/‎src/plugins/intel_cpu/src/nodes/deconv.cpp
+152-43
diff --git a/‎src/plugins/intel_cpu/src/nodes/deconv.h
+8-7 b/‎src/plugins/intel_cpu/src/nodes/deconv.h
+8-7
diff --git a/‎src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
+248 b/‎src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
+248
diff --git a/‎src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp
+78 b/‎src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp
+78
diff --git a/‎src/plugins/intel_cpu/src/nodes/executors/deconv.cpp
+13 b/‎src/plugins/intel_cpu/src/nodes/executors/deconv.cpp
+13
@@ -11,6 +11,8 @@
 #include <vector>
 #include "common/dnnl_executor.h"
 
+#include "executors/deconv_list.hpp"
+
 namespace ov {
 namespace intel_cpu {
 namespace node {
@@ -20,6 +22,7 @@ class Deconvolution : public Node {
     Deconvolution(const std::shared_ptr<ngraph::Node>& op, const GraphContext::CPtr context);
 
     void getSupportedDescriptors() override;
+    void initSupportedPrimitiveDescriptors() override;
     void createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
                           const std::vector<MemoryDescPtr>& outputDesc) override;
     void createPrimitive() override;
@@ -41,7 +44,7 @@ class Deconvolution : public Node {
     bool canFuse(const NodePtr& node) const override;
 
     const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); }
-    const std::vector<ptrdiff_t>& getStride() const { return stride; }
+    const std::vector<ptrdiff_t>& getStride() const { return deconvAttrs.stride; }
 
     void prepareParams() override;
     void execute(dnnl::stream strm) override;
@@ -55,6 +58,7 @@ class Deconvolution : public Node {
     AttrPtr initPrimitiveAttr() override;
     AttrPtr makePrimitiveAttr(const VectorDims& dims);
     std::vector<dnnl::memory::format_tag> getAvailableFormatsForDims(const Shape& dims) const override;
+    std::shared_ptr<DeconvExecutor> execPtrDeconv = nullptr;
 
 private:
     using executorPtr = std::shared_ptr<DnnlExecutor>;
@@ -89,16 +93,13 @@ class Deconvolution : public Node {
     size_t groupNum = 1;
     size_t IC = 0;
     size_t OC = 0;
-    std::vector<ptrdiff_t> kernel;
-    std::vector<ptrdiff_t> stride;
-    std::vector<ptrdiff_t> dilation;
-    ov::CoordinateDiff paddingL;
-    ov::CoordinateDiff paddingR;
-    ov::CoordinateDiff outputPadding;
     std::vector<int32_t> lastOutputSpatialDims;
     VectorDims int8WeightDims;
     VectorDims expectedBiasDims {};
 
+    bool useACL = false;
+    DeconvAttrs deconvAttrs;
+
     Shape inShape;
 
     AttrPtr pAttr;
 
@@ -0,0 +1,248 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "acl_deconv.hpp"
+#include "ie_parallel.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+using namespace arm_compute;
+
+ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
+                                           const std::vector<MemoryDescPtr>& srcDescs,
+                                           const std::vector<MemoryDescPtr>& dstDescs) {
+    auto srcDims  = srcDescs[0]->getShape().getDims();
+    auto weiDims  = srcDescs[1]->getShape().getDims();
+    // swap input and output channels dimensions to be align with ACL
+    // weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
+    std::swap(weiDims[0], weiDims[1]);
+    auto dstDims  = dstDescs[0]->getShape().getDims();
+
+    VectorDims biasDims;
+    TensorInfo biasTensorInfo;
+
+    if (deconvAttrs.withBiasesParam) {
+        biasDims = srcDescs[2]->getShape().getStaticDims();
+        biasTensorInfo = TensorInfo(shapeCast(biasDims), 1,
+                                    precisionToAclDataType(srcDescs[2]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2]));
+    }
+
+    TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1,
+                                          precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
+    TensorInfo weiTensorInfo = TensorInfo(shapeCast(weiDims), 1,
+                                          precisionToAclDataType(srcDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[1]));
+    TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1,
+                                          precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));
+
+    unsigned int pad_l =
+            (deconvAttrs.paddingL.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingL.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
+    unsigned int pad_r =
+            (deconvAttrs.paddingR.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingR.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingR.at(0));
+    unsigned int pad_t = static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
+    unsigned int pad_b = static_cast<unsigned int>(deconvAttrs.paddingR.at(0));
+    unsigned int stride_x = (deconvAttrs.stride.size() > 1) ? deconvAttrs.stride.at(1) : deconvAttrs.stride.at(0);
+    unsigned int stride_y = deconvAttrs.stride.at(0);
+    PadStrideInfo deconv_info(stride_x, stride_y, pad_l, pad_r, pad_t, pad_b, DimensionRoundingType::FLOOR);
+
+    return ACLDeconvTensorInfo{srcTensorInfo, weiTensorInfo, biasTensorInfo, dstTensorInfo, deconv_info};
+}
+
+AclDeconvExecutor::AclDeconvExecutor(const ExecutorContext::CPtr context) : DeconvExecutor(context) {}
+
+bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,
+                          const std::vector<MemoryDescPtr>& srcDescs,
+                          const std::vector<MemoryDescPtr>& dstDescs,
+                          const dnnl::primitive_attr &attr) {
+    this->deconvAttrs = deconvAttrs;
+    ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
+    TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
+    TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo;
+    TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo;
+    TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
+    PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;
+
+    arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
+                                                                             &weiTensorInfo,
+                                                                             deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
+                                                                             &dstTensorInfo,
+                                                                             deconv_info);
+    if (!status) {
+        DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
+        return false;
+    }
+
+    srcTensor.allocator()->init(srcTensorInfo);
+    weiTensor.allocator()->init(weiTensorInfo);
+    dstTensor.allocator()->init(dstTensorInfo);
+    if (deconvAttrs.withBiasesParam)
+        biasTensor.allocator()->init(biasTensorInfo);
+
+    deconv = std::make_unique<arm_compute::NEDeconvolutionLayer>();
+    deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info);
+
+    // weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
+     weiBuffer = std::vector<float>(srcDescs[1]->getShape().getStaticDims()[0] *
+                                    srcDescs[1]->getShape().getStaticDims()[1] *
+                                    srcDescs[1]->getShape().getStaticDims()[2] *
+                                    srcDescs[1]->getShape().getStaticDims()[3]);
+    return true;
+}
+
+static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
+    const auto src_data = reinterpret_cast<float*>(srcMemPtr->getData());
+
+    const int DIM0 = srcMemPtr->getStaticDims()[0];
+    const int DIM1 = srcMemPtr->getStaticDims()[1];
+    const int DIM2 = srcMemPtr->getStaticDims()[2];
+    const int DIM3 = srcMemPtr->getStaticDims()[3];
+
+    parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) {
+                for (int dim3 = 0; dim3 < DIM3; ++dim3) {
+                    const int src_off = dim0 * DIM1 * DIM2 * DIM3 +
+                                        dim1 * DIM2 * DIM3 +
+                                        dim2 * DIM3 +
+                                        dim3;
+                    const int dst_off = dim1 * DIM0 * DIM2 * DIM3 +
+                                        dim0 * DIM2 * DIM3 +
+                                        dim2 * DIM3 +
+                                        dim3;
+
+                    dst_data[dst_off] = src_data[src_off];
+                }
+    });
+}
+
+void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) {
+    // TODO: Remove transpose from exec
+    transpose_to_1023(src[1], weiBuffer);
+
+    srcTensor.allocator()->import_memory(src[0]->getData());
+    dstTensor.allocator()->import_memory(dst[0]->getData());
+    weiTensor.allocator()->import_memory(weiBuffer.data());
+    if (deconvAttrs.withBiasesParam)
+        biasTensor.allocator()->import_memory(src[2]->getData());
+    deconv->run();
+
+    srcTensor.allocator()->free();
+    dstTensor.allocator()->free();
+    weiTensor.allocator()->free();
+    if (deconvAttrs.withBiasesParam)
+        biasTensor.allocator()->free();
+}
+
+bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs,
+                                                 const std::vector<MemoryDescPtr> &srcDescs,
+                                                 const std::vector<MemoryDescPtr> &dstDescs)  {
+    if ((srcDescs[0]->getShape().getDims().size() != 3 && srcDescs[0]->getShape().getDims().size() != 4) ||
+        dstDescs[0]->getShape().getDims().size() != srcDescs[0]->getShape().getDims().size() ||
+        srcDescs[1]->getShape().getDims().size() != 4) {
+        DEBUG_LOG("AclDeconvExecutor does not support dimension:",
+                  " src[0]=", srcDescs[0]->getShape().getDims().size(),
+                  " src[1]=", srcDescs[1]->getShape().getDims().size(),
+                  " dst[0]=", dstDescs[0]->getShape().getDims().size());
+        return false;
+    }
+
+    // TODO: Ticket CVS-114087 - enable FP16 when check FP16 scoup
+    if (!(one_of(srcDescs[0]->getPrecision(), /*InferenceEngine::Precision::FP16, */InferenceEngine::Precision::FP32) &&
+          srcDescs[0]->getPrecision() == srcDescs[1]->getPrecision() &&
+          srcDescs[1]->getPrecision() == dstDescs[0]->getPrecision())) {
+        DEBUG_LOG("AclDeconvExecutor does not support precisions:",
+                  " src[0]=", srcDescs[0]->getPrecision(),
+                  " src[1]=", srcDescs[1]->getPrecision(),
+                  " dst[0]=", dstDescs[0]->getPrecision());
+        return false;
+    }
+
+    if (deconvAttrs.withBiasesParam && srcDescs[2]->getPrecision() != srcDescs[0]->getPrecision()) {
+        DEBUG_LOG("AclDeconvExecutor does not support precisions:",
+                  " src[2]=", srcDescs[2]->getPrecision());
+        return false;
+    }
+
+    if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) &&
+          srcDescs[1]->hasLayoutType(LayoutType::ncsp) &&
+          dstDescs[0]->hasLayoutType(LayoutType::ncsp)) &&
+        !(srcDescs[0]->hasLayoutType(LayoutType::nspc) &&
+          srcDescs[1]->hasLayoutType(LayoutType::nspc) &&
+          dstDescs[0]->hasLayoutType(LayoutType::nspc))) {
+        DEBUG_LOG("AclDeconvExecutor does not support layouts:",
+                  " src[0]=", srcDescs[0]->serializeFormat(),
+                  " src[1]=", srcDescs[1]->serializeFormat(),
+                  " dst=", dstDescs[0]->serializeFormat());
+        return false;
+    }
+
+    if (deconvAttrs.withBiasesParam &&
+        !(srcDescs[2]->hasLayoutType(LayoutType::ncsp)) &&
+        !(srcDescs[2]->hasLayoutType(LayoutType::nspc))) {
+        DEBUG_LOG("AclDeconvExecutor does not support layouts:",
+                  " src[0]=", srcDescs[0]->serializeFormat(),
+                  " src[1]=", srcDescs[1]->serializeFormat(),
+                  " src[2]=", srcDescs[2]->serializeFormat(),
+                  " dst=", dstDescs[0]->serializeFormat());
+        return false;
+    }
+
+    ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
+    TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
+    TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo;
+    TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo;
+    TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
+    PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;
+
+    unsigned int kernel_x = (deconvAttrs.kernel.size() > 1) ? deconvAttrs.kernel.at(1) : deconvAttrs.kernel.at(0);
+    unsigned int kernel_y = deconvAttrs.kernel.at(0);
+
+    // After stride=8 up-sampling in ACL Deconvolution layer slower than reference
+    if (deconv_info.stride().first >= 8 || deconv_info.stride().second >= 8) return false;
+
+    unsigned int dilation_x = (deconvAttrs.dilation.size() > 1) ? deconvAttrs.dilation.at(1) : deconvAttrs.dilation.at(0);
+    unsigned int dilation_y = deconvAttrs.dilation.at(0);
+    if (!one_of(dilation_x, static_cast<unsigned int >(0), static_cast<unsigned int >(1)) ||
+        !one_of(dilation_y, static_cast<unsigned int >(0), static_cast<unsigned int >(1))) return false;
+
+    size_t in_h = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[2] : srcDescs[0]->getShape().getDims()[1];
+    size_t in_w = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[3] : srcDescs[0]->getShape().getDims()[2];
+
+    // Validate function has bug (https://github.com/ARM-software/ComputeLibrary/issues/1061) with error exception.
+    // We copy deconvolution_output_dimensions function for get correct validation
+    // TODO: remove after fix
+    if (validate_deconvolution_output_dimensions(in_w, in_h, kernel_x, kernel_y, deconv_info)) {
+        DEBUG_LOG("NEDeconvolutionLayer arm_compute::deconvolution_output_dimensions failed");
+        return false;
+    }
+
+    arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
+                                                                             &weiTensorInfo,
+                                                                             deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
+                                                                             &dstTensorInfo,
+                                                                             deconv_info);
+    if (!status) {
+        DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description());
+        return false;
+    }
+
+    return true;
+}
+
+bool AclDeconvExecutorBuilder::validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
+                                                                   unsigned int kernel_width,
+                                                                   unsigned int kernel_height,
+                                                                   const PadStrideInfo &pad_stride_info) {
+    const unsigned int pad_left   = pad_stride_info.pad_left();
+    const unsigned int pad_top    = pad_stride_info.pad_top();
+    const unsigned int pad_right  = pad_stride_info.pad_right();
+    const unsigned int pad_bottom = pad_stride_info.pad_bottom();
+    const unsigned int stride_x   = pad_stride_info.stride().first;
+    const unsigned int stride_y   = pad_stride_info.stride().second;
+
+    if (!((in_width < 1 || in_height < 1) ||
+          (((in_width - 1) * stride_x + kernel_width) < (pad_left + pad_right)) ||
+          (((in_height - 1) * stride_y + kernel_height) < (pad_top + pad_bottom)))) { return false; }
+    return true;
+}
+}   // namespace intel_cpu
+}   // namespace ov
@@ -0,0 +1,78 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "nodes/executors/deconv.hpp"
+#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "utils/debug_capabilities.h"
+#include "acl_utils.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+struct ACLDeconvTensorInfo {
+    arm_compute::TensorInfo srcTensorInfo;
+    arm_compute::TensorInfo weiTensorInfo;
+    arm_compute::TensorInfo biasTensorInfo;
+    arm_compute::TensorInfo dstTensorInfo;
+    arm_compute::PadStrideInfo deconv_info;
+};
+
+ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
+                                       const std::vector<MemoryDescPtr>& srcDescs,
+                                       const std::vector<MemoryDescPtr>& dstDescs);
+
+class AclDeconvExecutor : public DeconvExecutor {
+public:
+    explicit AclDeconvExecutor(const ExecutorContext::CPtr context);
+    bool init(const DeconvAttrs& deconvAttrs,
+              const std::vector<MemoryDescPtr>& srcDescs,
+              const std::vector<MemoryDescPtr>& dstDescs,
+              const dnnl::primitive_attr &attr) override;
+    void exec(const std::vector<MemoryCPtr>& src,
+              const std::vector<MemoryPtr>& dst,
+              const void *post_ops_data_) override;
+
+    impl_desc_type getImplType() const override {
+        return implType;
+    }
+
+private:
+    DeconvAttrs deconvAttrs;
+    impl_desc_type implType = impl_desc_type::acl;
+
+    arm_compute::Tensor srcTensor;
+    arm_compute::Tensor weiTensor;
+    arm_compute::Tensor biasTensor;
+    arm_compute::Tensor dstTensor;
+    std::unique_ptr<arm_compute::NEDeconvolutionLayer> deconv = nullptr;
+
+    std::vector<float> weiBuffer;
+};
+
+class AclDeconvExecutorBuilder : public DeconvExecutorBuilder {
+public:
+    static bool customIsSupported(const DeconvAttrs& deconvAttrs,
+                                  const std::vector<MemoryDescPtr>& srcDescs,
+                                  const std::vector<MemoryDescPtr>& dstDescs);
+
+    bool isSupported(const DeconvAttrs& deconvAttrs,
+                     const std::vector<MemoryDescPtr>& srcDescs,
+                     const std::vector<MemoryDescPtr>& dstDescs) const override {
+        return customIsSupported(deconvAttrs, srcDescs, dstDescs);
+    }
+
+    DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override {
+        return std::make_shared<AclDeconvExecutor>(context);
+    }
+
+private:
+    static bool validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
+                                                         unsigned int kernel_width, unsigned int kernel_height,
+                                                         const arm_compute::PadStrideInfo &pad_stride_info);
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
@@ -0,0 +1,13 @@
+// Copyright (C) 2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "deconv.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+using namespace InferenceEngine;
+
+}   // namespace intel_cpu
+}   // namespace ov