Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ARM CPU] ACL TBB scheduler #17445

Merged
merged 25 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
8e81a38
tbb experiment - old version
allnes May 9, 2023
5a81547
enable throughput mode
allnes May 9, 2023
04eb077
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 16, 2023
a25f2cc
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 19, 2023
b527a92
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 19, 2023
4a585b5
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 22, 2023
e9316b5
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 23, 2023
69efd08
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 23, 2023
607f56c
Merge branch 'master' into an/tbb_scheduler_exp
allnes May 23, 2023
871dd8d
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes May 25, 2023
c48bd28
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes Jun 1, 2023
7f20bf8
acl scheduler trnasform
allnes Jun 5, 2023
fa19a46
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes Jun 5, 2023
17926b5
Merge branch 'master' of https://github.com/openvinotoolkit/openvino …
allnes Jun 6, 2023
f455b09
new version tbb scheduler
allnes Jun 7, 2023
8280284
Merge remote-tracking branch 'origin/an/tbb_scheduler_exp' into an/tb…
allnes Jun 7, 2023
1dd7dac
Merge branch 'master' into an/tbb_scheduler_exp
allnes Jul 3, 2023
d10234b
update scheduling
allnes Jul 3, 2023
50b3bc8
Merge branch 'master' into an/tbb_scheduler_exp
allnes Jul 3, 2023
d60b8f0
enable multi-streaming
allnes Jul 3, 2023
4f03280
Merge branch 'master' into an/tbb_scheduler_exp
allnes Jul 3, 2023
44d2b16
Merge branch 'master' into an/tbb_scheduler_exp
allnes Jul 12, 2023
9d6eee7
change
allnes Jul 12, 2023
1f3a64d
Merge branch 'master' into an/tbb_scheduler_exp
allnes Jul 18, 2023
369ebc0
Merge branch 'master' into an/tbb_scheduler_exp
allnes Jul 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,6 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
streamExecutorConfig._streams_changed = true;
}

#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
// TODO: multi-stream execution has functional issues on ARM target
streamExecutorConfig._streams = 1;
streamExecutorConfig._streams_changed = true;
#endif

CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
updateProperties();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Copyright (C) 2020-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_ie_scheduler.hpp"

#include "arm_compute/core/CPP/ICPPKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include <ie_parallel.hpp>

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

ACLScheduler::ACLScheduler() = default;

unsigned int ACLScheduler::num_threads() const { return parallel_get_num_threads(); }

void ACLScheduler::set_num_threads(unsigned int num_threads) {}
Copy link
Contributor

@dmitry-gorokhov dmitry-gorokhov Jul 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method has to be implemented in proper way. You can have num_threads internal field initialized with parallel_get_num_threads(); as default value.
Then run_workloads can be implemented using InferenceEngine::parallel_nt which takes number of threads as parameter.
Anyway this need to aligned with streams support activity to provide proper behavior inside TBB arena. This might be not trivial given ACL has only global scheduler instance.
Lets follow-up later


void ACLScheduler::schedule(ICPPKernel *kernel, const Hints &hints) {
ITensorPack tensors;
schedule_common(kernel, hints, kernel->window(), tensors);
}

void ACLScheduler::schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) {
schedule_common(kernel, hints, kernel->window(), tensors);
dmitry-gorokhov marked this conversation as resolved.
Show resolved Hide resolved
}

void ACLScheduler::run_workloads(std::vector<arm_compute::IScheduler::Workload> &workloads) {
InferenceEngine::parallel_for(workloads.size(), [&] (int wid) {
ThreadInfo info;
info.cpu_info = &cpu_info();
info.num_threads = parallel_get_num_threads();
info.thread_id = wid;
workloads[wid](info);
});
}

} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (C) 2020-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <arm_compute/runtime/Scheduler.h>
#include <arm_compute/core/CPP/ICPPKernel.h>
#include <arm_compute/core/ITensorPack.h>

namespace ov {
namespace intel_cpu {

using namespace arm_compute;

class ACLScheduler final : public IScheduler {
public:
ACLScheduler();
~ACLScheduler() override = default;
std::uint32_t num_threads() const override;
void set_num_threads(unsigned int num_threads) override;
void schedule(ICPPKernel *kernel, const Hints &hints) override;
void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) override;
protected:
void run_workloads(std::vector<Workload> &workloads) override;
};
} // namespace intel_cpu
} // namespace ov
9 changes: 9 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
#include <cpu/x64/cpu_isa_traits.hpp>
#include <itt.h>

#if defined(OV_CPU_WITH_ACL)
#include "nodes/executors/acl/acl_ie_scheduler.hpp"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
#endif

using namespace InferenceEngine;

#define IE_CPU_PLUGIN_THROW(...) IE_THROW(__VA_ARGS__) << "CPU plugin: "
Expand Down Expand Up @@ -142,6 +147,10 @@ Engine::Engine() :
specialSetup(new CPUSpecialSetup) {
_pluginName = "CPU";
extensionManager->AddExtension(std::make_shared<Extension>());
#if defined(OV_CPU_WITH_ACL)
acl_scheduler = std::make_unique<ACLScheduler>();
arm_compute::Scheduler::set(acl_scheduler);
#endif
}

Engine::~Engine() {
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ class Engine : public InferenceEngine::IInferencePlugin {
const std::string deviceFullName;

std::shared_ptr<void> specialSetup;

#if defined(OV_CPU_WITH_ACL)
std::shared_ptr<arm_compute::IScheduler> acl_scheduler;
#endif
};

} // namespace intel_cpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,16 +196,6 @@ std::vector<std::string> disabledTestPatterns() {
retVector.emplace_back(R"(.*HeteroSyntheticTest.*)");
retVector.emplace_back(R"(.*IEClassBasicTestP.*)");
#elif defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
{
// TODO: enable once streams / tput mode is supported
retVector.emplace_back(R"(OVClassConfigTestCPU.smoke_CpuExecNetworkCheck(Model|Core)StreamsHasHigherPriorityThanLatencyHint.*)");
retVector.emplace_back(R"(smoke_BehaviorTests/CorrectConfigCheck.canSetConfigAndCheckGetConfig.*CPU_THROUGHPUT_STREAMS=8.*)");
retVector.emplace_back(R"(smoke_BehaviorTests/CorrectConfigCheck.canSetConfigTwiceAndCheckGetConfig.*CPU_THROUGHPUT_STREAMS=8.*)");
retVector.emplace_back(R"(smoke_CPU_OVClassLoadNetworkAndCheckWithSecondaryPropertiesTest/OVClassLoadNetworkAndCheckSecondaryPropertiesTest.LoadNetworkAndCheckSecondaryPropertiesTest.*)");
retVector.emplace_back(R"(smoke_CPU_OVClassLoadNetworkAndCheckWithSecondaryPropertiesDoubleTest/OVClassLoadNetworkAndCheckSecondaryPropertiesTest.LoadNetworkAndCheckSecondaryPropertiesTest.*)");
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckSecondaryPropertiesTest.*)");
retVector.emplace_back(R"(smoke_CPU_OVClassCompileModelAndCheckWithSecondaryPropertiesDoubleTest.*)");
}
// invalid test: checks u8 precision for runtime graph, while it should be f32
retVector.emplace_back(R"(smoke_NegativeQuantizedMatMulMultiplyFusion.*)");
// int8 specific
Expand Down