|
| 1 | +// Copyright (C) 2023 Intel Corporation |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +// |
| 4 | + |
| 5 | +#include "acl_deconv.hpp" |
| 6 | +#include "ie_parallel.hpp" |
| 7 | + |
| 8 | +namespace ov { |
| 9 | +namespace intel_cpu { |
| 10 | + |
| 11 | +using namespace arm_compute; |
| 12 | + |
| 13 | +ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs, |
| 14 | + const std::vector<MemoryDescPtr>& srcDescs, |
| 15 | + const std::vector<MemoryDescPtr>& dstDescs) { |
| 16 | + auto srcDims = srcDescs[0]->getShape().getDims(); |
| 17 | + auto weiDims = srcDescs[1]->getShape().getDims(); |
| 18 | + // swap input and output channels dimensions to be align with ACL |
| 19 | + // weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor |
| 20 | + std::swap(weiDims[0], weiDims[1]); |
| 21 | + auto dstDims = dstDescs[0]->getShape().getDims(); |
| 22 | + |
| 23 | + VectorDims biasDims; |
| 24 | + TensorInfo biasTensorInfo; |
| 25 | + |
| 26 | + if (deconvAttrs.withBiasesParam) { |
| 27 | + biasDims = srcDescs[2]->getShape().getStaticDims(); |
| 28 | + biasTensorInfo = TensorInfo(shapeCast(biasDims), 1, |
| 29 | + precisionToAclDataType(srcDescs[2]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2])); |
| 30 | + } |
| 31 | + |
| 32 | + TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1, |
| 33 | + precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0])); |
| 34 | + TensorInfo weiTensorInfo = TensorInfo(shapeCast(weiDims), 1, |
| 35 | + precisionToAclDataType(srcDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[1])); |
| 36 | + TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1, |
| 37 | + precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0])); |
| 38 | + |
| 39 | + unsigned int pad_l = |
| 40 | + (deconvAttrs.paddingL.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingL.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingL.at(0)); |
| 41 | + unsigned int pad_r = |
| 42 | + (deconvAttrs.paddingR.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingR.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingR.at(0)); |
| 43 | + unsigned int pad_t = static_cast<unsigned int>(deconvAttrs.paddingL.at(0)); |
| 44 | + unsigned int pad_b = static_cast<unsigned int>(deconvAttrs.paddingR.at(0)); |
| 45 | + unsigned int stride_x = (deconvAttrs.stride.size() > 1) ? deconvAttrs.stride.at(1) : deconvAttrs.stride.at(0); |
| 46 | + unsigned int stride_y = deconvAttrs.stride.at(0); |
| 47 | + PadStrideInfo deconv_info(stride_x, stride_y, pad_l, pad_r, pad_t, pad_b, DimensionRoundingType::FLOOR); |
| 48 | + |
| 49 | + return ACLDeconvTensorInfo{srcTensorInfo, weiTensorInfo, biasTensorInfo, dstTensorInfo, deconv_info}; |
| 50 | +} |
| 51 | + |
| 52 | +AclDeconvExecutor::AclDeconvExecutor(const ExecutorContext::CPtr context) : DeconvExecutor(context) {} |
| 53 | + |
| 54 | +bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs, |
| 55 | + const std::vector<MemoryDescPtr>& srcDescs, |
| 56 | + const std::vector<MemoryDescPtr>& dstDescs, |
| 57 | + const dnnl::primitive_attr &attr) { |
| 58 | + this->deconvAttrs = deconvAttrs; |
| 59 | + ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs); |
| 60 | + TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo; |
| 61 | + TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo; |
| 62 | + TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo; |
| 63 | + TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo; |
| 64 | + PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info; |
| 65 | + |
| 66 | + arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo, |
| 67 | + &weiTensorInfo, |
| 68 | + deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr, |
| 69 | + &dstTensorInfo, |
| 70 | + deconv_info); |
| 71 | + if (!status) { |
| 72 | + DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description()); |
| 73 | + return false; |
| 74 | + } |
| 75 | + |
| 76 | + srcTensor.allocator()->init(srcTensorInfo); |
| 77 | + weiTensor.allocator()->init(weiTensorInfo); |
| 78 | + dstTensor.allocator()->init(dstTensorInfo); |
| 79 | + if (deconvAttrs.withBiasesParam) |
| 80 | + biasTensor.allocator()->init(biasTensorInfo); |
| 81 | + |
| 82 | + deconv = std::make_unique<arm_compute::NEDeconvolutionLayer>(); |
| 83 | + deconv->configure(&srcTensor, &weiTensor, deconvAttrs.withBiasesParam ? &biasTensor : nullptr, &dstTensor, deconv_info); |
| 84 | + |
| 85 | + // weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor |
| 86 | + weiBuffer = std::vector<float>(srcDescs[1]->getShape().getStaticDims()[0] * |
| 87 | + srcDescs[1]->getShape().getStaticDims()[1] * |
| 88 | + srcDescs[1]->getShape().getStaticDims()[2] * |
| 89 | + srcDescs[1]->getShape().getStaticDims()[3]); |
| 90 | + return true; |
| 91 | +} |
| 92 | + |
| 93 | +static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) { |
| 94 | + const auto src_data = reinterpret_cast<float*>(srcMemPtr->getData()); |
| 95 | + |
| 96 | + const int DIM0 = srcMemPtr->getStaticDims()[0]; |
| 97 | + const int DIM1 = srcMemPtr->getStaticDims()[1]; |
| 98 | + const int DIM2 = srcMemPtr->getStaticDims()[2]; |
| 99 | + const int DIM3 = srcMemPtr->getStaticDims()[3]; |
| 100 | + |
| 101 | + parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) { |
| 102 | + for (int dim3 = 0; dim3 < DIM3; ++dim3) { |
| 103 | + const int src_off = dim0 * DIM1 * DIM2 * DIM3 + |
| 104 | + dim1 * DIM2 * DIM3 + |
| 105 | + dim2 * DIM3 + |
| 106 | + dim3; |
| 107 | + const int dst_off = dim1 * DIM0 * DIM2 * DIM3 + |
| 108 | + dim0 * DIM2 * DIM3 + |
| 109 | + dim2 * DIM3 + |
| 110 | + dim3; |
| 111 | + |
| 112 | + dst_data[dst_off] = src_data[src_off]; |
| 113 | + } |
| 114 | + }); |
| 115 | +} |
| 116 | + |
| 117 | +void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) { |
| 118 | + // TODO: Remove transpose from exec |
| 119 | + transpose_to_1023(src[1], weiBuffer); |
| 120 | + |
| 121 | + srcTensor.allocator()->import_memory(src[0]->getData()); |
| 122 | + dstTensor.allocator()->import_memory(dst[0]->getData()); |
| 123 | + weiTensor.allocator()->import_memory(weiBuffer.data()); |
| 124 | + if (deconvAttrs.withBiasesParam) |
| 125 | + biasTensor.allocator()->import_memory(src[2]->getData()); |
| 126 | + deconv->run(); |
| 127 | + |
| 128 | + srcTensor.allocator()->free(); |
| 129 | + dstTensor.allocator()->free(); |
| 130 | + weiTensor.allocator()->free(); |
| 131 | + if (deconvAttrs.withBiasesParam) |
| 132 | + biasTensor.allocator()->free(); |
| 133 | +} |
| 134 | + |
| 135 | +bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs, |
| 136 | + const std::vector<MemoryDescPtr> &srcDescs, |
| 137 | + const std::vector<MemoryDescPtr> &dstDescs) { |
| 138 | + if ((srcDescs[0]->getShape().getDims().size() != 3 && srcDescs[0]->getShape().getDims().size() != 4) || |
| 139 | + dstDescs[0]->getShape().getDims().size() != srcDescs[0]->getShape().getDims().size() || |
| 140 | + srcDescs[1]->getShape().getDims().size() != 4) { |
| 141 | + DEBUG_LOG("AclDeconvExecutor does not support dimension:", |
| 142 | + " src[0]=", srcDescs[0]->getShape().getDims().size(), |
| 143 | + " src[1]=", srcDescs[1]->getShape().getDims().size(), |
| 144 | + " dst[0]=", dstDescs[0]->getShape().getDims().size()); |
| 145 | + return false; |
| 146 | + } |
| 147 | + |
| 148 | + // TODO: Ticket CVS-114087 - enable FP16 when check FP16 scoup |
| 149 | + if (!(one_of(srcDescs[0]->getPrecision(), /*InferenceEngine::Precision::FP16, */InferenceEngine::Precision::FP32) && |
| 150 | + srcDescs[0]->getPrecision() == srcDescs[1]->getPrecision() && |
| 151 | + srcDescs[1]->getPrecision() == dstDescs[0]->getPrecision())) { |
| 152 | + DEBUG_LOG("AclDeconvExecutor does not support precisions:", |
| 153 | + " src[0]=", srcDescs[0]->getPrecision(), |
| 154 | + " src[1]=", srcDescs[1]->getPrecision(), |
| 155 | + " dst[0]=", dstDescs[0]->getPrecision()); |
| 156 | + return false; |
| 157 | + } |
| 158 | + |
| 159 | + if (deconvAttrs.withBiasesParam && srcDescs[2]->getPrecision() != srcDescs[0]->getPrecision()) { |
| 160 | + DEBUG_LOG("AclDeconvExecutor does not support precisions:", |
| 161 | + " src[2]=", srcDescs[2]->getPrecision()); |
| 162 | + return false; |
| 163 | + } |
| 164 | + |
| 165 | + if (!(srcDescs[0]->hasLayoutType(LayoutType::ncsp) && |
| 166 | + srcDescs[1]->hasLayoutType(LayoutType::ncsp) && |
| 167 | + dstDescs[0]->hasLayoutType(LayoutType::ncsp)) && |
| 168 | + !(srcDescs[0]->hasLayoutType(LayoutType::nspc) && |
| 169 | + srcDescs[1]->hasLayoutType(LayoutType::nspc) && |
| 170 | + dstDescs[0]->hasLayoutType(LayoutType::nspc))) { |
| 171 | + DEBUG_LOG("AclDeconvExecutor does not support layouts:", |
| 172 | + " src[0]=", srcDescs[0]->serializeFormat(), |
| 173 | + " src[1]=", srcDescs[1]->serializeFormat(), |
| 174 | + " dst=", dstDescs[0]->serializeFormat()); |
| 175 | + return false; |
| 176 | + } |
| 177 | + |
| 178 | + if (deconvAttrs.withBiasesParam && |
| 179 | + !(srcDescs[2]->hasLayoutType(LayoutType::ncsp)) && |
| 180 | + !(srcDescs[2]->hasLayoutType(LayoutType::nspc))) { |
| 181 | + DEBUG_LOG("AclDeconvExecutor does not support layouts:", |
| 182 | + " src[0]=", srcDescs[0]->serializeFormat(), |
| 183 | + " src[1]=", srcDescs[1]->serializeFormat(), |
| 184 | + " src[2]=", srcDescs[2]->serializeFormat(), |
| 185 | + " dst=", dstDescs[0]->serializeFormat()); |
| 186 | + return false; |
| 187 | + } |
| 188 | + |
| 189 | + ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs); |
| 190 | + TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo; |
| 191 | + TensorInfo weiTensorInfo = aclDeconvTensorInfo.weiTensorInfo; |
| 192 | + TensorInfo biasTensorInfo = aclDeconvTensorInfo.biasTensorInfo; |
| 193 | + TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo; |
| 194 | + PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info; |
| 195 | + |
| 196 | + unsigned int kernel_x = (deconvAttrs.kernel.size() > 1) ? deconvAttrs.kernel.at(1) : deconvAttrs.kernel.at(0); |
| 197 | + unsigned int kernel_y = deconvAttrs.kernel.at(0); |
| 198 | + |
| 199 | + // After stride=8 up-sampling in ACL Deconvolution layer slower than reference |
| 200 | + if (deconv_info.stride().first >= 8 || deconv_info.stride().second >= 8) return false; |
| 201 | + |
| 202 | + unsigned int dilation_x = (deconvAttrs.dilation.size() > 1) ? deconvAttrs.dilation.at(1) : deconvAttrs.dilation.at(0); |
| 203 | + unsigned int dilation_y = deconvAttrs.dilation.at(0); |
| 204 | + if (!one_of(dilation_x, static_cast<unsigned int >(0), static_cast<unsigned int >(1)) || |
| 205 | + !one_of(dilation_y, static_cast<unsigned int >(0), static_cast<unsigned int >(1))) return false; |
| 206 | + |
| 207 | + size_t in_h = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[2] : srcDescs[0]->getShape().getDims()[1]; |
| 208 | + size_t in_w = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[3] : srcDescs[0]->getShape().getDims()[2]; |
| 209 | + |
| 210 | + // Validate function has bug (https://github.com/ARM-software/ComputeLibrary/issues/1061) with error exception. |
| 211 | + // We copy deconvolution_output_dimensions function for get correct validation |
| 212 | + // TODO: remove after fix |
| 213 | + if (validate_deconvolution_output_dimensions(in_w, in_h, kernel_x, kernel_y, deconv_info)) { |
| 214 | + DEBUG_LOG("NEDeconvolutionLayer arm_compute::deconvolution_output_dimensions failed"); |
| 215 | + return false; |
| 216 | + } |
| 217 | + |
| 218 | + arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo, |
| 219 | + &weiTensorInfo, |
| 220 | + deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr, |
| 221 | + &dstTensorInfo, |
| 222 | + deconv_info); |
| 223 | + if (!status) { |
| 224 | + DEBUG_LOG("NEDeconvolutionLayer validation failed: ", status.error_description()); |
| 225 | + return false; |
| 226 | + } |
| 227 | + |
| 228 | + return true; |
| 229 | +} |
| 230 | + |
| 231 | +bool AclDeconvExecutorBuilder::validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, |
| 232 | + unsigned int kernel_width, |
| 233 | + unsigned int kernel_height, |
| 234 | + const PadStrideInfo &pad_stride_info) { |
| 235 | + const unsigned int pad_left = pad_stride_info.pad_left(); |
| 236 | + const unsigned int pad_top = pad_stride_info.pad_top(); |
| 237 | + const unsigned int pad_right = pad_stride_info.pad_right(); |
| 238 | + const unsigned int pad_bottom = pad_stride_info.pad_bottom(); |
| 239 | + const unsigned int stride_x = pad_stride_info.stride().first; |
| 240 | + const unsigned int stride_y = pad_stride_info.stride().second; |
| 241 | + |
| 242 | + if (!((in_width < 1 || in_height < 1) || |
| 243 | + (((in_width - 1) * stride_x + kernel_width) < (pad_left + pad_right)) || |
| 244 | + (((in_height - 1) * stride_y + kernel_height) < (pad_top + pad_bottom)))) { return false; } |
| 245 | + return true; |
| 246 | +} |
| 247 | +} // namespace intel_cpu |
| 248 | +} // namespace ov |
0 commit comments