From e35acf91e9a953ee081d0bae355a7e848ef41b86 Mon Sep 17 00:00:00 2001 From: Attila Csok Date: Mon, 5 Aug 2024 15:45:52 +0300 Subject: [PATCH] [intel-npu] Adding NPU_TURBO option to plugin (#25646) ### Details: - Adding npu_turbo option for intel-npu plugin - updating documentation with turbo and other missing properties Master backport of https://github.com/openvinotoolkit/openvino/pull/25603 ### Tickets: - [*ticket-id*](https://jira.devtools.intel.com/browse/CVS-147038) --- .../npu-device.rst | 1 + .../openvino/runtime/intel_npu/properties.hpp | 11 +++++ src/plugins/intel_npu/README.md | 40 +++++++++++++++ .../include/intel_npu/al/config/runtime.hpp | 17 +++++++ src/plugins/intel_npu/src/al/include/npu.hpp | 2 +- .../intel_npu/src/al/src/config/runtime.cpp | 1 + .../src/backend/include/zero_backend.hpp | 2 +- .../src/backend/src/zero_backend.cpp | 2 +- .../src/backend/src/zero_wrappers.cpp | 9 ++++ .../compiler/src/zero_compiler_in_driver.cpp | 4 ++ .../intel_npu/src/plugin/include/backends.hpp | 2 +- .../intel_npu/src/plugin/src/backends.cpp | 4 +- .../src/plugin/src/compiled_model.cpp | 6 +++ .../intel_npu/src/plugin/src/plugin.cpp | 8 ++- .../ov_infer_request/compile_and_infer.cpp | 34 +++++++++++++ .../internal/overload/compile_and_infer.hpp | 49 +++++++++++++++++-- .../ov_infer_request/compile_and_infer.cpp | 22 --------- .../intel_npu/thirdparty/level-zero-ext | 2 +- 18 files changed, 182 insertions(+), 34 deletions(-) create mode 100644 src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp delete mode 100644 src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst index 7ac982e37f6716..cd05280dab825a 100644 --- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst +++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst @@ -139,6 +139,7 @@ offer a limited set of supported OpenVINO features. ov::enable_profiling ov::workload_type ov::intel_npu::compilation_mode_params + ov::intel_npu::turbo .. tab-item:: Read-only properties diff --git a/src/inference/include/openvino/runtime/intel_npu/properties.hpp b/src/inference/include/openvino/runtime/intel_npu/properties.hpp index 37db91c1188906..2c9d15152b4239 100644 --- a/src/inference/include/openvino/runtime/intel_npu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_npu/properties.hpp @@ -61,5 +61,16 @@ static constexpr ov::Property driver_versi */ static constexpr ov::Property compilation_mode_params{"NPU_COMPILATION_MODE_PARAMS"}; +/** + * @brief [Only for NPU plugin] + * Type: std::bool + * Set turbo on or off. The turbo mode, where available, provides a hint to the system to maintain the + * maximum NPU frequency and memory throughput within the platform TDP limits. + * Turbo mode is not recommended for sustainable workloads due to higher power consumption and potential impact on other + * compute resources. + * @ingroup ov_runtime_npu_prop_cpp_api + */ +static constexpr ov::Property turbo{"NPU_TURBO"}; + } // namespace intel_npu } // namespace ov diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md index 6666db53e13271..e30480ecca780e 100644 --- a/src/plugins/intel_npu/README.md +++ b/src/plugins/intel_npu/README.md @@ -166,6 +166,14 @@ The following properties are supported: | `ov::device::architecture`/
`DEVICE_ARCHITECTURE` | RO | Returns the platform information. | `N/A`| `N/A` | | `ov::device::full_name`/
`FULL_DEVICE_NAME` | RO | Returns the full name of the NPU device. | `N/A`| `N/A` | | `ov::internal::exclusive_async_requests`/
`EXCLUSIVE_ASYNC_REQUESTS` | RW | Allows to use exclusive task executor for asynchronous infer requests. | `YES`/ `NO`| `NO` | +| `ov::device::type`/
`DEVICE_TYPE` | RO | Returns the type of device, discrete or integrated. | `DISCREETE` /
`INTEGRATED` | `N/A` | +| `ov::device::gops`/
`DEVICE_GOPS` | RO | Returns the Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device. | `N/A`| `N/A` | +| `ov::device::pci_info`/
`DEVICE_PCI_INFO` | RO | Returns the PCI bus information of device. See PCIInfo struct definition for details | `N/A`| `N/A` | +| `ov::intel_npu::device_alloc_mem_size`/
`NPU_DEVICE_ALLOC_MEM_SIZE` | RO | Size of already allocated NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` | +| `ov::intel_npu::device_total_mem_size`/
`NPU_DEVICE_TOTAL_MEM_SIZE` | RO | Size of available NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` | +| `ov::intel_npu::driver_version`/
`NPU_DRIVER_VERSION` | RO | NPU driver version (for both discrete/integrated NPU devices). | `N/A` | `N/A` | +| `ov::intel_npu::compilation_mode_params`/
`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | ``| `N/A` | +| `ov::intel_npu::turbo`/
`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` |   ### Performance Hint: Default Number of DPU Groups / DMA Engines @@ -192,6 +200,38 @@ The following table shows the optimal number of inference requests returned by t | 3720 | 4 | 1 | | 4000 | 8 | 1 | +  +### Compilation mode parameters +``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to control model compilation for NPU. +Note: The functionality is in experimental stage currently, can be a subject for deprecation and may be replaced with generic OV API in future OV releases. + +Following configuration options are supported: + +#### optimization-level +Defines a preset of optimization passes to be applied during compilation. Supported values: + +| Value | Description | +| :--- | :--- | +| 0 | Reduced subset of optimization passes. Smaller compile time. | +| 1 | Default. Balanced performance/compile time. | +| 2 | Prioritize performance over compile time that may be an issue. | + +#### performance-hint-override +An extension for LATENCY mode being specified using ``ov::hint::performance_mode`` +Has no effect for other ``ov::hint::PerformanceMode`` hints. + +Supported values: + +| Value | Description | +| :--- | :--- | +| efficiency | Default. Balanced performance and power consumption. | +| latency | Prioritize performance over power efficiency. | + +#### Usage example: +``` + map config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")}; + compile_model(model, config); +```   ## Stateful models diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp index cf3be645c470c7..c261a420a624ea 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp @@ -204,4 +204,21 @@ struct WORKLOAD_TYPE final : OptionBase { static std::string toString(const ov::WorkloadType& val); }; + +// +// TURBO +// +struct TURBO final : OptionBase { + static std::string_view key() { + return ov::intel_npu::turbo.name(); + } + + static bool defaultValue() { + return false; + } + + static OptionMode mode() { + return OptionMode::RunTime; + } +}; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/include/npu.hpp b/src/plugins/intel_npu/src/al/include/npu.hpp index 5d46ae3ae2a4ac..f2d4006368af84 100644 --- a/src/plugins/intel_npu/src/al/include/npu.hpp +++ b/src/plugins/intel_npu/src/al/include/npu.hpp @@ -38,7 +38,7 @@ class IEngineBackend : public std::enable_shared_from_this { /** @brief Backend has support for concurrency batching */ virtual bool isBatchingSupported() const = 0; /** @brief Backend has support for workload type */ - virtual bool isWorkloadTypeSupported() const = 0; + virtual bool isCommandQueueExtSupported() const = 0; /** @brief Register backend-specific options */ virtual void registerOptions(OptionsDesc& options) const; /** @brief Get Level Zero context*/ diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp index 8dd7e3b4b58bec..80c6aaa3f5ca64 100644 --- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp +++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp @@ -24,6 +24,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); + desc.add(); } // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp index d75af35dff9f8a..52745c962370fa 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp @@ -26,7 +26,7 @@ class ZeroEngineBackend final : public IEngineBackend { uint32_t getDriverExtVersion() const override; bool isBatchingSupported() const override; - bool isWorkloadTypeSupported() const override; + bool isCommandQueueExtSupported() const override; void* getContext() const override; diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp index fb581650fd996f..761fb41504d2d5 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp @@ -34,7 +34,7 @@ bool ZeroEngineBackend::isBatchingSupported() const { return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6; } -bool ZeroEngineBackend::isWorkloadTypeSupported() const { +bool ZeroEngineBackend::isCommandQueueExtSupported() const { return _instance->getCommandQueueDdiTable() != nullptr; } diff --git a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp index 2cd249aad19a92..7ee49a72e38d8e 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp @@ -140,6 +140,15 @@ CommandQueue::CommandQueue(const ze_device_handle_t& device_handle, _log("CommandQueue", config.get()) { ze_command_queue_desc_t queue_desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority}; + if (config.has()) { + if (_command_queue_npu_dditable_ext != nullptr) { + bool turbo = config.get(); + ze_command_queue_desc_npu_ext_t turbo_cfg = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC_NPU_EXT, nullptr, turbo}; + queue_desc.pNext = &turbo_cfg; + } else { + OPENVINO_THROW("Turbo is not supported by the current driver"); + } + } zeroUtils::throwOnFail("zeCommandQueueCreate", zeCommandQueueCreate(_context, device_handle, &queue_desc, &_handle)); } diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index f494f2e7c17e83..c61d5ab0760ac3 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -512,6 +512,10 @@ std::string LevelZeroCompilerInDriver::serializeConfig( std::ostringstream workloadtypestr; workloadtypestr << ov::workload_type.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; content = std::regex_replace(content, std::regex(workloadtypestr.str()), ""); + // Remove turbo property as it is not used by compiler + std::ostringstream turbostring; + turbostring << ov::intel_npu::turbo.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER; + content = std::regex_replace(content, std::regex(turbostring.str()), ""); // FINAL step to convert prefixes of remaining params, to ensure backwards compatibility // From 5.0.0, driver compiler start to use NPU_ prefix, the old version uses VPU_ prefix diff --git a/src/plugins/intel_npu/src/plugin/include/backends.hpp b/src/plugins/intel_npu/src/plugin/include/backends.hpp index e6e1668a65c6dc..e27471ea77b8c9 100644 --- a/src/plugins/intel_npu/src/plugin/include/backends.hpp +++ b/src/plugins/intel_npu/src/plugin/include/backends.hpp @@ -32,7 +32,7 @@ class NPUBackends final { uint32_t getDriverVersion() const; uint32_t getDriverExtVersion() const; bool isBatchingSupported() const; - bool isWorkloadTypeSupported() const; + bool isCommandQueueExtSupported() const; void registerOptions(OptionsDesc& options) const; void* getContext() const; std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const; diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp index eeb88d1c596786..d5e6da4370ac1d 100644 --- a/src/plugins/intel_npu/src/plugin/src/backends.cpp +++ b/src/plugins/intel_npu/src/plugin/src/backends.cpp @@ -163,9 +163,9 @@ bool NPUBackends::isBatchingSupported() const { return false; } -bool NPUBackends::isWorkloadTypeSupported() const { +bool NPUBackends::isCommandQueueExtSupported() const { if (_backend != nullptr) { - return _backend->isWorkloadTypeSupported(); + return _backend->isCommandQueueExtSupported(); } return false; diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp index 2dfe0b5bb59650..0f0ef23dfcbc11 100644 --- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp @@ -328,6 +328,12 @@ void CompiledModel::initialize_properties() { [](const Config& config) { return config.get(); }}}, + {ov::intel_npu::turbo.name(), + {isPropertySupported(ov::intel_npu::turbo.name()), + ov::PropertyMutability::RO, + [](const Config& config) { + return config.get(); + }}}, // NPU Private // ========= {ov::intel_npu::tiles.name(), diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 98f037f7a47271..f18fc6aa13cf5f 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -299,7 +299,7 @@ Plugin::Plugin() return _metrics->GetAvailableDevicesNames(); }}}, {ov::workload_type.name(), - {_backends->isWorkloadTypeSupported(), + {_backends->isCommandQueueExtSupported(), ov::PropertyMutability::RW, [](const Config& config) { return config.get(); @@ -440,6 +440,12 @@ Plugin::Plugin() [](const Config& config) { return config.get(); }}}, + {ov::intel_npu::turbo.name(), + {_backends->isCommandQueueExtSupported(), + ov::PropertyMutability::RW, + [](const Config& config) { + return config.get(); + }}}, // NPU Private // ========= {ov::intel_npu::dma_engines.name(), diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp new file mode 100644 index 00000000000000..b26d0fc621a9db --- /dev/null +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "overload/compile_and_infer.hpp" + +#include + +#include "common/npu_test_env_cfg.hpp" +#include "common/utils.hpp" + +namespace { + +using namespace ov::test::behavior; + +const std::vector configs = {{}}; + +INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, + OVCompileAndInferRequest, + ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)), + ::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(configs)), + ov::test::utils::appendPlatformTypeTestName); + +INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, + OVCompileAndInferRequestTurbo, + ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)), + ::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(std::vector{ + {ov::intel_npu::create_executor(0)}, + {ov::intel_npu::create_executor(1)}})), + ov::test::utils::appendPlatformTypeTestName); + +} // namespace diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp index 52091a4743fb75..d35f0b393c02e5 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp @@ -33,7 +33,7 @@ inline std::shared_ptr getConstantGraph(element::Type type) { return std::make_shared(results, params); } -inline bool isWorkloadTypeSupported() { +inline bool isCommandQueueExtSupported() { return std::make_shared<::intel_npu::ZeroInitStructsHolder>()->getCommandQueueDdiTable() != nullptr; } @@ -100,7 +100,7 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) { return property == workload_type.name(); }); - if (isWorkloadTypeSupported()) { + if (isCommandQueueExtSupported()) { ASSERT_TRUE(workloadTypeSupported); ov::InferRequest req; OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); @@ -137,7 +137,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) { return property == workload_type.name(); }); - if (isWorkloadTypeSupported()) { + if (isCommandQueueExtSupported()) { ASSERT_TRUE(workloadTypeSupported); OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); ov::InferRequest req; @@ -165,7 +165,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) { modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); - if (isWorkloadTypeSupported()) { + if (isCommandQueueExtSupported()) { ov::InferRequest req; OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); bool is_called = false; @@ -183,6 +183,47 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) { } } +using OVCompileAndInferRequestTurbo = OVCompileAndInferRequest; + +TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) { + configuration[intel_npu::turbo.name()] = true; + + auto supportedProperties = core->get_property("NPU", supported_properties.name()).as>(); + bool isTurboSupported = + std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) { + return property == intel_npu::turbo.name(); + }); + + if (isCommandQueueExtSupported()) { + ASSERT_TRUE(isTurboSupported); + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + auto turbosetting_compiled_model = execNet.get_property(intel_npu::turbo.name()); + OV_ASSERT_NO_THROW(turbosetting_compiled_model = true); + ov::InferRequest req; + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); + bool is_called = false; + OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { + ASSERT_EQ(exception_ptr, nullptr); + is_called = true; + })); + OV_ASSERT_NO_THROW(req.start_async()); + OV_ASSERT_NO_THROW(req.wait()); + ASSERT_TRUE(is_called); + } else { + auto cr_ex = configuration.find(intel_npu::create_executor.name()); + if (cr_ex->second.as() == 1) { + OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration), + ov::Exception, + "Turbo is not supported by the current driver"); + } else { + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(), + ov::Exception, + "Turbo is not supported by the current driver"); + } + } +} + } // namespace behavior } // namespace test } // namespace ov diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp deleted file mode 100644 index eb7cf591e328a0..00000000000000 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "overload/compile_and_infer.hpp" -#include -#include "common/utils.hpp" -#include "common/npu_test_env_cfg.hpp" - -namespace { - -using namespace ov::test::behavior; - -const std::vector configs = {{}}; - -INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, OVCompileAndInferRequest, - ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)), - ::testing::Values(ov::test::utils::DEVICE_NPU), - ::testing::ValuesIn(configs)), - ov::test::utils::appendPlatformTypeTestName); - -} // namespace diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index 518d64125521cd..16c85231a82ee1 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit 518d64125521cd0f8c98d65f9a0fb40013e95d15 +Subproject commit 16c85231a82ee1a0b06ed7ab7da3f411a0878ed7