Skip to content

Commit

Permalink
[ARM CPU] Common fixes for fp16 (openvinotoolkit#20504)
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes authored Oct 18, 2023
1 parent f723f90 commit 3b2ad48
Show file tree
Hide file tree
Showing 10 changed files with 45 additions and 29 deletions.
4 changes: 2 additions & 2 deletions src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() {
return;

Precision inDataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX);
if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16)
if (!one_of(inDataPrecision, Precision::FP32, Precision::BF16, Precision::FP16))
IE_THROW() << errorPrefix << "has unsupported 'data' input precision: " << inDataPrecision;

Precision seqLenPrecision = getOriginalInputPrecisionAtPort(SEQUENCE_LENGTH_INDEX);
if (seqLenPrecision != Precision::FP32 && seqLenPrecision != Precision::BF16)
if (!one_of(inDataPrecision, Precision::FP32, Precision::BF16, Precision::FP16))
IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision;

addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() {
return;

Precision inDataPrecision = getOriginalInputPrecisionAtPort(DATA_INDEX);
if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::BF16)
if (!one_of(inDataPrecision, Precision::FP32, Precision::BF16, Precision::FP16))
IE_THROW() << errorPrefix << "has unsupported 'data' input precision: " << inDataPrecision;

Precision seqLenPrecision = getOriginalInputPrecisionAtPort(SEQUENCE_LENGTH_INDEX);
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ void NonMaxSuppression::initSupportedPrimitiveDescriptors() {
if (!supportedPrimitiveDescriptors.empty())
return;

const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16};
const std::vector<Precision> supportedFloatPrecision = {Precision::FP32, Precision::BF16, Precision::FP16};
const std::vector<Precision> supportedIntOutputPrecision = {Precision::I32, Precision::I64};

checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType);
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_cpu/src/nodes/non_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void NonZero::initSupportedPrimitiveDescriptors() {
return;

const auto &inPrc = getOriginalInputPrecisionAtPort(0);
if (!one_of(inPrc, Precision::FP32, Precision::BF16, Precision::I32, Precision::U32, Precision::I8, Precision::U8)) {
if (!one_of(inPrc, Precision::FP32, Precision::BF16, Precision::FP16, Precision::I32, Precision::U32, Precision::I8, Precision::U8)) {
IE_THROW() << "Can't create primitive descriptor for NonZero layer with name: " << getName() << " doesn't support "
<< inPrc.name() << " precision on 0 port";
}
Expand Down Expand Up @@ -123,6 +123,7 @@ void NonZero::execute(dnnl::stream strm) {
OV_SWITCH(intel_cpu, NonZeroExecute, ctx, inputPrec,
OV_CASE(Precision::FP32, float),
OV_CASE(Precision::BF16, bfloat16_t),
OV_CASE(Precision::FP16, float16),
OV_CASE(Precision::I32, int),
OV_CASE(Precision::U32, uint32_t),
OV_CASE(Precision::I8, int8_t),
Expand Down
11 changes: 8 additions & 3 deletions src/plugins/intel_cpu/src/nodes/normalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -796,10 +796,14 @@ void NormalizeL2::initSupportedPrimitiveDescriptors() {
inputPrecision = outputPrecision = Precision::BF16;
}

if (!one_of(inputPrecision, Precision::FP32, Precision::BF16, Precision::I8, Precision::U8)) {
if (one_of(Precision::FP16, inputPrecision, outputPrecision) && mayiuse(cpu::x64::sse41)) {
inputPrecision = outputPrecision = Precision::FP32;
}

if (!one_of(inputPrecision, Precision::FP32, Precision::BF16, Precision::FP16, Precision::I8, Precision::U8)) {
THROW_ERROR << "has unsupported input precision: " << inputPrecision;
}
if (!one_of(outputPrecision, Precision::FP32, Precision::BF16, Precision::I8, Precision::U8)) {
if (!one_of(outputPrecision, Precision::FP32, Precision::BF16, Precision::FP16, Precision::I8, Precision::U8)) {
THROW_ERROR << "has unsupported output precision: " << outputPrecision;
}

Expand Down Expand Up @@ -1483,7 +1487,8 @@ std::shared_ptr<NormalizeL2::NormalizeL2Executor> NormalizeL2::NormalizeL2Execut
OV_CASE2(Precision::U8, Precision::FP32, uint8_t, float),
OV_CASE2(Precision::I8, Precision::FP32, int8_t, float),
OV_CASE2(Precision::FP32, Precision::FP32, float, float),
OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t));
OV_CASE2(Precision::BF16, Precision::BF16, bfloat16_t, bfloat16_t),
OV_CASE2(Precision::FP16, Precision::FP16, float16_t, float16_t));

return ctx.executor;
}
Expand Down
7 changes: 6 additions & 1 deletion src/plugins/intel_cpu/src/nodes/rnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ inline bool haveAttention(const dnnl::algorithm& alg) {
const std::map<memory::data_type, memory::data_type> RNN::weightsByinputDataType {
// layer data type weights data type
{memory::data_type::f32, memory::data_type::f32},
{memory::data_type::f16, memory::data_type::f16},
{memory::data_type::bf16, memory::data_type::bf16},
{memory::data_type::u8, memory::data_type::s8},
{memory::data_type::s8, memory::data_type::s8},
Expand Down Expand Up @@ -505,6 +506,10 @@ void RNN::configurePortDataTypes() {
if (one_of(memory::data_type::bf16, inDataTypes[xIdx], inDataTypes[hIdx]))
inDataTypes[xIdx] = outDataTypes[yIdx] = outDataTypes[hoIdx] = inDataTypes[hIdx] = memory::data_type::bf16; // required by oneDNN.

if (one_of(memory::data_type::f16, inDataTypes[xIdx], inDataTypes[hIdx]))
// onednn doesn't have fp16 instance
inDataTypes[xIdx] = outDataTypes[yIdx] = outDataTypes[hoIdx] = inDataTypes[hIdx] = memory::data_type::f32; // required by oneDNN.

if (outDataTypes[yIdx] == memory::data_type::bf16 && one_of(inDataTypes[xIdx], memory::data_type::s8, memory::data_type::u8))
outDataTypes[yIdx] = memory::data_type::f32; // oneDNN does not support bf16 output precision for quantized rnn primitive yet
}
Expand Down Expand Up @@ -882,7 +887,7 @@ void RNN::copyWeightsData() {
}

const auto& dataType = inDataTypes[xIdx];
if (dataType == memory::data_type::bf16) {
if (one_of(dataType, memory::data_type::bf16, memory::data_type::f16)) {
fillWeights<uint16_t>(gate_map, wIdx, rIdx);
} else if (dataType == memory::data_type::f32) {
// WA To avoid different weights layer and iter formats in FP32 case
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,18 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) {
ASSERT_EQ(false, value);
}

#if defined(OV_CPU_ARM_ENABLE_FP16)
const auto expected_precision_for_performance_mode = ov::element::f16;
#else
const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;
#endif

TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigHintInferencePrecision) {
ov::Core ie;
auto value = ov::element::f32;
const auto precision = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;

ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::inference_precision));
ASSERT_EQ(precision, value);
ASSERT_EQ(expected_precision_for_performance_mode, value);

const auto forcedPrecision = ov::element::f32;

Expand Down Expand Up @@ -210,8 +215,6 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigEnableProfiling) {
ASSERT_EQ(enableProfiling, value);
}

const auto expected_precision_for_performance_mode = InferenceEngine::with_cpu_x86_bfloat16() ? ov::element::bf16 : ov::element::f32;

const auto bf16_if_can_be_emulated = InferenceEngine::with_cpu_x86_avx512_core() ? ov::element::bf16 : ov::element::f32;
using ExpectedModeAndType = std::pair<ov::hint::ExecutionMode, ov::element::Type>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ using namespace ov::test::behavior;
namespace {

const std::vector<ov::AnyMap> configs = {
{}
{{ov::hint::inference_precision.name(), ov::element::f32}}
};

const std::vector<ov::AnyMap> HeteroConfigs = {
{ov::device::priorities(ov::test::utils::DEVICE_CPU)}
{{ov::hint::inference_precision.name(), ov::element::f32},
{ov::device::priorities(ov::test::utils::DEVICE_CPU)}},
};

std::shared_ptr<ngraph::Function> getFunction1() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ using namespace ov::test::behavior;
namespace {

const std::vector<ov::AnyMap> configs = {
{}
{{ov::hint::inference_precision.name(), ov::element::f32}}
};

const std::vector<ov::AnyMap> HeteroConfigs = {
{ov::device::priorities(ov::test::utils::DEVICE_CPU)}
{{ov::hint::inference_precision.name(), ov::element::f32},
{ov::device::priorities(ov::test::utils::DEVICE_CPU)}},
};

INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVIterationChaining,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,18 +190,6 @@ std::vector<std::string> disabledTestPatterns() {
// Issue: 122094
R"(smoke_Interpolate_Basic_Down_Sample_Tail/InterpolateLayerTest.Inference.*(asymmetric|align_corners).*f16.*)",
};
#if defined(__APPLE__) && defined(OPENVINO_ARCH_ARM64)
// Issue: 120950
retVector.emplace_back(R"(.*smoke_TensorIteratorCommon/TensorIteratorTest.Inference.*_modelType=f16_targetDevice=CPU.*)");
retVector.emplace_back(R"(.*smoke_CtcGreedyDecoderBasic/CTCGreedyDecoderLayerTest.Inference.*netPRC=f16.*trgDev=CPU.*)");
retVector.emplace_back(R"(.*CTCGreedyDecoderSeqLenLayerTest.Inference.*dataPRC=f16.*trgDev=CPU.*)");
// Issue: 122177
retVector.emplace_back(R"(.*smoke_LSTMCellCommon/LSTMCellTest.Inference.*_modelType=f16.*)");
retVector.emplace_back(R"(.*smoke_LSTMSequenceCommonZeroClip/LSTMSequenceTest.Inference.*_modelType=f16.*)");
// Issue 122699
retVector.emplace_back(R"(.*smoke_nonzero/NonZeroLayerTest.Inference.*inPRC=f16.*)");
retVector.emplace_back(R"(.*NormalizeL2LayerTest.Inference.*netPRC=f16.*)");
#endif

#if defined(OPENVINO_ARCH_X86)
retVector.emplace_back(R"(.*DetectionOutputLayerTest.*)");
Expand Down Expand Up @@ -230,6 +218,18 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
// int8 specific
retVector.emplace_back(R"(smoke_Quantized.*)");

#if defined(OV_CPU_ARM_ENABLE_FP16)
// Issue: 123019
retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_CeilRounding.*modelType=f16.*)");
retVector.emplace_back(R"(smoke_AvgPool_ExplicitPad_FloorRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)");
retVector.emplace_back(R"(smoke_AvgPool_SameUpperPad_FloorRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)");
retVector.emplace_back(R"(smoke_AvgPool_SameLowerPad_CeilRounding_5Dinput/PoolingLayerTest.*modelType=f16.*)");
retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)");
retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)");
retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)");
#endif

#endif

#if defined(OPENVINO_ARCH_ARM)
Expand Down

0 comments on commit 3b2ad48

Please sign in to comment.