From e35acf91e9a953ee081d0bae355a7e848ef41b86 Mon Sep 17 00:00:00 2001
From: Attila Csok <attila.csok@intel.com>
Date: Mon, 5 Aug 2024 15:45:52 +0300
Subject: [PATCH] [intel-npu] Adding NPU_TURBO option to plugin (#25646)

### Details:
 - Adding npu_turbo option for intel-npu plugin
 - updating documentation with turbo and other missing properties

Master backport of
https://github.com/openvinotoolkit/openvino/pull/25603

### Tickets:
 - [*ticket-id*](https://jira.devtools.intel.com/browse/CVS-147038)
---
 .../npu-device.rst                            |  1 +
 .../openvino/runtime/intel_npu/properties.hpp | 11 +++++
 src/plugins/intel_npu/README.md               | 40 +++++++++++++++
 .../include/intel_npu/al/config/runtime.hpp   | 17 +++++++
 src/plugins/intel_npu/src/al/include/npu.hpp  |  2 +-
 .../intel_npu/src/al/src/config/runtime.cpp   |  1 +
 .../src/backend/include/zero_backend.hpp      |  2 +-
 .../src/backend/src/zero_backend.cpp          |  2 +-
 .../src/backend/src/zero_wrappers.cpp         |  9 ++++
 .../compiler/src/zero_compiler_in_driver.cpp  |  4 ++
 .../intel_npu/src/plugin/include/backends.hpp |  2 +-
 .../intel_npu/src/plugin/src/backends.cpp     |  4 +-
 .../src/plugin/src/compiled_model.cpp         |  6 +++
 .../intel_npu/src/plugin/src/plugin.cpp       |  8 ++-
 .../ov_infer_request/compile_and_infer.cpp    | 34 +++++++++++++
 .../internal/overload/compile_and_infer.hpp   | 49 +++++++++++++++++--
 .../ov_infer_request/compile_and_infer.cpp    | 22 ---------
 .../intel_npu/thirdparty/level-zero-ext       |  2 +-
 18 files changed, 182 insertions(+), 34 deletions(-)
 create mode 100644 src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
 delete mode 100644 src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp
diff --git a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst
index 7ac982e37f6716..cd05280dab825a 100644
--- a/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst
+++ b/docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst
@@ -139,6 +139,7 @@ offer a limited set of supported OpenVINO features.
          ov::enable_profiling
          ov::workload_type
          ov::intel_npu::compilation_mode_params
+         ov::intel_npu::turbo
 
    .. tab-item:: Read-only properties
 
diff --git a/src/inference/include/openvino/runtime/intel_npu/properties.hpp b/src/inference/include/openvino/runtime/intel_npu/properties.hpp
index 37db91c1188906..2c9d15152b4239 100644
--- a/src/inference/include/openvino/runtime/intel_npu/properties.hpp
+++ b/src/inference/include/openvino/runtime/intel_npu/properties.hpp
@@ -61,5 +61,16 @@ static constexpr ov::Property<uint32_t, ov::PropertyMutability::RO> driver_versi
  */
 static constexpr ov::Property<std::string> compilation_mode_params{"NPU_COMPILATION_MODE_PARAMS"};
 
+/**
+ * @brief [Only for NPU plugin]
+ * Type: std::bool
+ * Set turbo on or off. The turbo mode, where available, provides a hint to the system to maintain the
+ * maximum NPU frequency and memory throughput within the platform TDP limits.
+ * Turbo mode is not recommended for sustainable workloads due to higher power consumption and potential impact on other
+ * compute resources.
+ * @ingroup ov_runtime_npu_prop_cpp_api
+ */
+static constexpr ov::Property<bool> turbo{"NPU_TURBO"};
+
 }  // namespace intel_npu
 }  // namespace ov
diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md
index 6666db53e13271..e30480ecca780e 100644
--- a/src/plugins/intel_npu/README.md
+++ b/src/plugins/intel_npu/README.md
@@ -166,6 +166,14 @@ The following properties are supported:
 | `ov::device::architecture`/</br>`DEVICE_ARCHITECTURE` | RO | Returns the platform information. | `N/A`| `N/A` |
 | `ov::device::full_name`/</br>`FULL_DEVICE_NAME` | RO | Returns the full name of the NPU device. | `N/A`| `N/A` |
 | `ov::internal::exclusive_async_requests`/</br>`EXCLUSIVE_ASYNC_REQUESTS` | RW | Allows to use exclusive task executor for asynchronous infer requests. | `YES`/ `NO`| `NO` |
+| `ov::device::type`/</br>`DEVICE_TYPE` | RO | Returns the type of device, discrete or integrated. | `DISCREETE` /</br>`INTEGRATED` | `N/A` |
+| `ov::device::gops`/</br>`DEVICE_GOPS` | RO | Returns the Giga OPS per second count (GFLOPS or GIOPS) for a set of precisions supported by specified device. | `N/A`| `N/A` |
+| `ov::device::pci_info`/</br>`DEVICE_PCI_INFO` | RO | Returns the PCI bus information of device. See PCIInfo struct definition for details | `N/A`| `N/A` |
+| `ov::intel_npu::device_alloc_mem_size`/</br>`NPU_DEVICE_ALLOC_MEM_SIZE` | RO | Size of already allocated NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` |
+| `ov::intel_npu::device_total_mem_size`/</br>`NPU_DEVICE_TOTAL_MEM_SIZE` | RO | Size of available NPU DDR memory (both for discrete/integrated NPU devices) | `N/A` | `N/A` |
+| `ov::intel_npu::driver_version`/</br>`NPU_DRIVER_VERSION` | RO | NPU driver version (for both discrete/integrated NPU devices). | `N/A` | `N/A` |
+| `ov::intel_npu::compilation_mode_params`/</br>`NPU_COMPILATION_MODE_PARAMS` | RW | Set various parameters supported by the NPU compiler. (See bellow) | `<std::string>`| `N/A` |
+| `ov::intel_npu::turbo`/</br>`NPU_TURBO` | RW | Set Turbo mode on/off | `YES`/ `NO`| `NO` |
 
 &nbsp;
 ### Performance Hint: Default Number of DPU Groups / DMA Engines
@@ -192,6 +200,38 @@ The following table shows the optimal number of inference requests returned by t
 | 3720                | 4                                           | 1                                       |
 | 4000                | 8                                           | 1                                       |
 
+&nbsp;
+### Compilation mode parameters
+``ov::intel_npu::compilation_mode_params`` is an NPU-specific property that allows to control model compilation for NPU.
+Note: The functionality is in experimental stage currently, can be a subject for deprecation and may be replaced with generic OV API in future OV releases.
+
+Following configuration options are supported:
+
+#### optimization-level
+Defines a preset of optimization passes to be applied during compilation. Supported values:
+
+| Value  | Description                                                    |
+| :---   | :---                                                           | 
+| 0      | Reduced subset of optimization passes. Smaller compile time.   |
+| 1      | Default. Balanced performance/compile time.                    |
+| 2      | Prioritize performance over compile time that may be an issue. |
+
+#### performance-hint-override
+An extension for LATENCY mode being specified using ``ov::hint::performance_mode``
+Has no effect for other ``ov::hint::PerformanceMode`` hints.
+
+Supported values:
+
+| Value      | Description                                          |
+| :---       | :---                                                 | 
+| efficiency | Default. Balanced performance and power consumption. |
+| latency    | Prioritize performance over power efficiency.        |
+
+#### Usage example:
+```
+    map<str, str> config = {ov::intel_npu::compilation_mode_params.name(), ov::Any("optimization-level=1 performance-hint-override=latency")};
+    compile_model(model, config);
+```
 
 &nbsp;
 ## Stateful models
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp
index cf3be645c470c7..c261a420a624ea 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/runtime.hpp
@@ -204,4 +204,21 @@ struct WORKLOAD_TYPE final : OptionBase<WORKLOAD_TYPE, ov::WorkloadType> {
 
     static std::string toString(const ov::WorkloadType& val);
 };
+
+//
+// TURBO
+//
+struct TURBO final : OptionBase<TURBO, bool> {
+    static std::string_view key() {
+        return ov::intel_npu::turbo.name();
+    }
+
+    static bool defaultValue() {
+        return false;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/include/npu.hpp b/src/plugins/intel_npu/src/al/include/npu.hpp
index 5d46ae3ae2a4ac..f2d4006368af84 100644
--- a/src/plugins/intel_npu/src/al/include/npu.hpp
+++ b/src/plugins/intel_npu/src/al/include/npu.hpp
@@ -38,7 +38,7 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
     /** @brief Backend has support for concurrency batching */
     virtual bool isBatchingSupported() const = 0;
     /** @brief Backend has support for workload type */
-    virtual bool isWorkloadTypeSupported() const = 0;
+    virtual bool isCommandQueueExtSupported() const = 0;
     /** @brief Register backend-specific options */
     virtual void registerOptions(OptionsDesc& options) const;
     /** @brief Get Level Zero context*/
diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
index 8dd7e3b4b58bec..80c6aaa3f5ca64 100644
--- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
@@ -24,6 +24,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
     desc.add<NUM_STREAMS>();
     desc.add<ENABLE_CPU_PINNING>();
     desc.add<WORKLOAD_TYPE>();
+    desc.add<TURBO>();
 }
 
 // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp
index d75af35dff9f8a..52745c962370fa 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp
@@ -26,7 +26,7 @@ class ZeroEngineBackend final : public IEngineBackend {
     uint32_t getDriverExtVersion() const override;
 
     bool isBatchingSupported() const override;
-    bool isWorkloadTypeSupported() const override;
+    bool isCommandQueueExtSupported() const override;
 
     void* getContext() const override;
 
diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp
index fb581650fd996f..761fb41504d2d5 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp
@@ -34,7 +34,7 @@ bool ZeroEngineBackend::isBatchingSupported() const {
     return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;
 }
 
-bool ZeroEngineBackend::isWorkloadTypeSupported() const {
+bool ZeroEngineBackend::isCommandQueueExtSupported() const {
     return _instance->getCommandQueueDdiTable() != nullptr;
 }
 
diff --git a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp
index 2cd249aad19a92..7ee49a72e38d8e 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_wrappers.cpp
@@ -140,6 +140,15 @@ CommandQueue::CommandQueue(const ze_device_handle_t& device_handle,
       _log("CommandQueue", config.get<LOG_LEVEL>()) {
     ze_command_queue_desc_t queue_desc =
         {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority};
+    if (config.has<TURBO>()) {
+        if (_command_queue_npu_dditable_ext != nullptr) {
+            bool turbo = config.get<TURBO>();
+            ze_command_queue_desc_npu_ext_t turbo_cfg = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC_NPU_EXT, nullptr, turbo};
+            queue_desc.pNext = &turbo_cfg;
+        } else {
+            OPENVINO_THROW("Turbo is not supported by the current driver");
+        }
+    }
     zeroUtils::throwOnFail("zeCommandQueueCreate",
                            zeCommandQueueCreate(_context, device_handle, &queue_desc, &_handle));
 }
diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
index f494f2e7c17e83..c61d5ab0760ac3 100644
--- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
@@ -512,6 +512,10 @@ std::string LevelZeroCompilerInDriver<TableExtension>::serializeConfig(
     std::ostringstream workloadtypestr;
     workloadtypestr << ov::workload_type.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER;
     content = std::regex_replace(content, std::regex(workloadtypestr.str()), "");
+    // Remove turbo property as it is not used by compiler
+    std::ostringstream turbostring;
+    turbostring << ov::intel_npu::turbo.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+" << VALUE_DELIMITER;
+    content = std::regex_replace(content, std::regex(turbostring.str()), "");
 
     // FINAL step to convert prefixes of remaining params, to ensure backwards compatibility
     // From 5.0.0, driver compiler start to use NPU_ prefix, the old version uses VPU_ prefix
diff --git a/src/plugins/intel_npu/src/plugin/include/backends.hpp b/src/plugins/intel_npu/src/plugin/include/backends.hpp
index e6e1668a65c6dc..e27471ea77b8c9 100644
--- a/src/plugins/intel_npu/src/plugin/include/backends.hpp
+++ b/src/plugins/intel_npu/src/plugin/include/backends.hpp
@@ -32,7 +32,7 @@ class NPUBackends final {
     uint32_t getDriverVersion() const;
     uint32_t getDriverExtVersion() const;
     bool isBatchingSupported() const;
-    bool isWorkloadTypeSupported() const;
+    bool isCommandQueueExtSupported() const;
     void registerOptions(OptionsDesc& options) const;
     void* getContext() const;
     std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const;
diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp
index eeb88d1c596786..d5e6da4370ac1d 100644
--- a/src/plugins/intel_npu/src/plugin/src/backends.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/backends.cpp
@@ -163,9 +163,9 @@ bool NPUBackends::isBatchingSupported() const {
     return false;
 }
 
-bool NPUBackends::isWorkloadTypeSupported() const {
+bool NPUBackends::isCommandQueueExtSupported() const {
     if (_backend != nullptr) {
-        return _backend->isWorkloadTypeSupported();
+        return _backend->isCommandQueueExtSupported();
     }
 
     return false;
diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
index 2dfe0b5bb59650..0f0ef23dfcbc11 100644
--- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
@@ -328,6 +328,12 @@ void CompiledModel::initialize_properties() {
           [](const Config& config) {
               return config.get<COMPILATION_MODE_PARAMS>();
           }}},
+        {ov::intel_npu::turbo.name(),
+         {isPropertySupported(ov::intel_npu::turbo.name()),
+          ov::PropertyMutability::RO,
+          [](const Config& config) {
+              return config.get<TURBO>();
+          }}},
         // NPU Private
         // =========
         {ov::intel_npu::tiles.name(),
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 98f037f7a47271..f18fc6aa13cf5f 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -299,7 +299,7 @@ Plugin::Plugin()
               return _metrics->GetAvailableDevicesNames();
           }}},
         {ov::workload_type.name(),
-         {_backends->isWorkloadTypeSupported(),
+         {_backends->isCommandQueueExtSupported(),
           ov::PropertyMutability::RW,
           [](const Config& config) {
               return config.get<WORKLOAD_TYPE>();
@@ -440,6 +440,12 @@ Plugin::Plugin()
           [](const Config& config) {
               return config.get<COMPILATION_MODE_PARAMS>();
           }}},
+        {ov::intel_npu::turbo.name(),
+         {_backends->isCommandQueueExtSupported(),
+          ov::PropertyMutability::RW,
+          [](const Config& config) {
+              return config.get<TURBO>();
+          }}},
         // NPU Private
         // =========
         {ov::intel_npu::dma_engines.name(),
diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
new file mode 100644
index 00000000000000..b26d0fc621a9db
--- /dev/null
+++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "overload/compile_and_infer.hpp"
+
+#include <npu_private_properties.hpp>
+
+#include "common/npu_test_env_cfg.hpp"
+#include "common/utils.hpp"
+
+namespace {
+
+using namespace ov::test::behavior;
+
+const std::vector<ov::AnyMap> configs = {{}};
+
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
+                         OVCompileAndInferRequest,
+                         ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
+                                            ::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configs)),
+                         ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequest>);
+
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
+                         OVCompileAndInferRequestTurbo,
+                         ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
+                                            ::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(std::vector<ov::AnyMap>{
+                                                {ov::intel_npu::create_executor(0)},
+                                                {ov::intel_npu::create_executor(1)}})),
+                         ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequestTurbo>);
+
+}  // namespace
diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
index 52091a4743fb75..d35f0b393c02e5 100644
--- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
@@ -33,7 +33,7 @@ inline std::shared_ptr<ov::Model> getConstantGraph(element::Type type) {
     return std::make_shared<Model>(results, params);
 }
 
-inline bool isWorkloadTypeSupported() {
+inline bool isCommandQueueExtSupported() {
     return std::make_shared<::intel_npu::ZeroInitStructsHolder>()->getCommandQueueDdiTable() != nullptr;
 }
 
@@ -100,7 +100,7 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) {
             return property == workload_type.name();
         });
 
-    if (isWorkloadTypeSupported()) {
+    if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(workloadTypeSupported);
         ov::InferRequest req;
         OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
@@ -137,7 +137,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) {
             return property == workload_type.name();
         });
 
-    if (isWorkloadTypeSupported()) {
+    if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(workloadTypeSupported);
         OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
         ov::InferRequest req;
@@ -165,7 +165,7 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
     modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
     OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
 
-    if (isWorkloadTypeSupported()) {
+    if (isCommandQueueExtSupported()) {
         ov::InferRequest req;
         OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         bool is_called = false;
@@ -183,6 +183,47 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
     }
 }
 
+using OVCompileAndInferRequestTurbo = OVCompileAndInferRequest;
+
+TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {
+    configuration[intel_npu::turbo.name()] = true;
+
+    auto supportedProperties = core->get_property("NPU", supported_properties.name()).as<std::vector<PropertyName>>();
+    bool isTurboSupported =
+        std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) {
+            return property == intel_npu::turbo.name();
+        });
+
+    if (isCommandQueueExtSupported()) {
+        ASSERT_TRUE(isTurboSupported);
+        OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+        auto turbosetting_compiled_model = execNet.get_property(intel_npu::turbo.name());
+        OV_ASSERT_NO_THROW(turbosetting_compiled_model = true);
+        ov::InferRequest req;
+        OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
+        bool is_called = false;
+        OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) {
+            ASSERT_EQ(exception_ptr, nullptr);
+            is_called = true;
+        }));
+        OV_ASSERT_NO_THROW(req.start_async());
+        OV_ASSERT_NO_THROW(req.wait());
+        ASSERT_TRUE(is_called);
+    } else {
+        auto cr_ex = configuration.find(intel_npu::create_executor.name());
+        if (cr_ex->second.as<int64_t>() == 1) {
+            OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration),
+                                          ov::Exception,
+                                          "Turbo is not supported by the current driver");
+        } else {
+            OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+            OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(),
+                                          ov::Exception,
+                                          "Turbo is not supported by the current driver");
+        }
+    }
+}
+
 }  // namespace behavior
 }  // namespace test
 }  // namespace ov
diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp
deleted file mode 100644
index eb7cf591e328a0..00000000000000
--- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/behavior/ov_infer_request/compile_and_infer.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright (C) 2018-2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "overload/compile_and_infer.hpp"
-#include <npu_private_properties.hpp>
-#include "common/utils.hpp"
-#include "common/npu_test_env_cfg.hpp"
-
-namespace {
-
-using namespace ov::test::behavior;
-
-const std::vector<ov::AnyMap> configs = {{}};
-
-INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, OVCompileAndInferRequest,
-                         ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
-                                            ::testing::Values(ov::test::utils::DEVICE_NPU),
-                                            ::testing::ValuesIn(configs)),
-                         ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequest>);
-
-}  // namespace
diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext
index 518d64125521cd..16c85231a82ee1 160000
--- a/src/plugins/intel_npu/thirdparty/level-zero-ext
+++ b/src/plugins/intel_npu/thirdparty/level-zero-ext
@@ -1 +1 @@
-Subproject commit 518d64125521cd0f8c98d65f9a0fb40013e95d15
+Subproject commit 16c85231a82ee1a0b06ed7ab7da3f411a0878ed7