Skip to content

Commit ac9c135

Browse files
preetha-inteljatinwadhwa921ankitm3ksfatimarsaurabhkale17
authored
Ovep develop 1.21 (microsoft#22824)
### Description OVEP development changes for ORT 1.21 Release ### Motivation and Context Has critical bug fixes Support for concurrency execution of models is enabled Support for OV 2024.5 Memory optimizations for NPU platform --------- Co-authored-by: jatinwadhwa921 <jatin.wadhwa@intel.com> Co-authored-by: Ankit Maheshkar <ankit.maheshkar@intel.com> Co-authored-by: sfatimar <sahar.fatima@intel.com> Co-authored-by: saurabhkale17 <saurabh1.kale@intel.com> Co-authored-by: TejalKhade28 <tejal.khade@intel.com> Co-authored-by: Javier E. Martinez <javier.e.martinez@intel.com>
1 parent 632a36a commit ac9c135

File tree

10 files changed

+77
-40
lines changed

10 files changed

+77
-40
lines changed

cmake/onnxruntime_providers_openvino.cmake

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,22 @@
1111
"${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
1212
)
1313

14-
if (WIN32)
15-
set(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
16-
endif()
17-
1814
# Header paths
1915
find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
20-
if(OpenVINO_VERSION VERSION_LESS 2024.0)
21-
message(FATAL_ERROR "OpenVINO 2024.0 and newer are supported. Please, use latest OpenVINO release")
16+
if(OpenVINO_VERSION VERSION_LESS 2024.3)
17+
message(FATAL_ERROR "OpenVINO 2024.3 and newer are supported. Please, use latest OpenVINO release")
2218
endif()
2319

2420
if(OpenVINO_VERSION VERSION_GREATER_EQUAL 2024.4)
2521
add_definitions(-DUSE_OVEP_NPU_MEMORY=1)
2622
endif()
2723

28-
if (WIN32)
29-
unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO)
24+
# If building RelWithDebInfo and OV package does not have that configuration map to Release
25+
get_target_property(ov_rt_implib_rwdi openvino::runtime IMPORTED_IMPLIB_RELWITHDEBINFO)
26+
if ((CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo) AND NOT ov_rt_implib_rwdi)
27+
set_target_properties(openvino::runtime PROPERTIES
28+
MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release
29+
)
3030
endif()
3131

3232
list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES})
@@ -82,3 +82,8 @@
8282
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
8383
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
8484
endif()
85+
86+
set_target_properties(onnxruntime_providers_openvino PROPERTIES
87+
MAP_IMPORTED_CONFIG_RELEASE RelWithDebInfo
88+
MAP_IMPORTED_CONFIG_DEBUG RelWithDebInfo
89+
)

include/onnxruntime/core/session/onnxruntime_c_api.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,8 +626,13 @@ typedef struct OrtMIGraphXProviderOptions {
626626
} OrtMIGraphXProviderOptions;
627627

628628
/** \brief OpenVINO Provider Options
629-
*
630-
* \see OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
629+
* \brief This Struct is frozen since ORT 1.13.0. Its maintained part of Legacy API for compatibility.
630+
* \brief For latest OpenVINO Provider Options update to the ProviderOptions map.
631+
* \brief Latest OpenVINO Provider Options are listed in the
632+
* \htmlonly
633+
* <a href="https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options">onnxruntime document.</a>
634+
* \endhtmlonly
635+
* \see OrtApi::SessionOptionsAppendExecutionProvider()
631636
*/
632637
typedef struct OrtOpenVINOProviderOptions {
633638
#ifdef __cplusplus
@@ -645,7 +650,7 @@ typedef struct OrtOpenVINOProviderOptions {
645650
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
646651
*/
647652
const char* device_type;
648-
unsigned char enable_npu_fast_compile;
653+
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
649654
const char* device_id;
650655
size_t num_of_threads; ///< 0 = Use default number of threads
651656
const char* cache_dir; // path is set to empty by default

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
120120
} catch (const char* msg) {
121121
ORT_THROW(msg);
122122
}
123-
124-
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, 1));
123+
int num_infer_req = (global_context_.num_of_threads > 0) ? global_context_.num_of_threads : 1;
124+
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, num_infer_req));
125125
}
126126

127127
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
@@ -663,7 +663,6 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
663663
// Requesting for an idle infer_request from a pool of infer_requests_
664664
OVInferRequestPtr infer_request;
665665
infer_request = inferRequestsQueue_->getIdleRequest();
666-
667666
#ifdef IO_BUFFER_ENABLED
668667
if ((global_context_.device_type.find("GPU") != std::string::npos) &&
669668
(global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {

onnxruntime/core/providers/openvino/openvino_execution_provider.cc

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#include <filesystem>
44
#include <utility>
55
#include <string>
6+
#include <memory>
7+
#include <vector>
68
#include "core/providers/shared_library/provider_api.h"
79
#include "core/providers/openvino/openvino_execution_provider.h"
810
#include "core/providers/openvino/contexts.h"
@@ -187,15 +189,23 @@ common::Status OpenVINOExecutionProvider::Compile(
187189

188190
#ifdef USE_OVEP_NPU_MEMORY
189191
std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
190-
AllocatorCreationInfo npu_allocator_info{
191-
[this](OrtDevice::DeviceId device_id) {
192-
return std::make_unique<OVRTAllocator>(global_context_->ie_core.Get(), OrtDevice::NPU, device_id, OpenVINO_RT_NPU);
193-
},
194-
0,
195-
};
196-
197-
// fill in allocator
198-
return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
192+
if (global_context_->device_type.find("NPU") != std::string::npos) {
193+
AllocatorCreationInfo npu_allocator_info{
194+
[this](OrtDevice::DeviceId device_id) {
195+
return std::make_unique<OVRTAllocator>(
196+
global_context_->ie_core.Get(),
197+
OrtDevice::NPU,
198+
device_id,
199+
OpenVINO_RT_NPU);
200+
},
201+
0,
202+
};
203+
204+
// fill in allocator
205+
return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
206+
} else {
207+
return std::vector<AllocatorPtr>{};
208+
}
199209
}
200210
#endif
201211

onnxruntime/core/providers/openvino/openvino_execution_provider.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
199199
#endif
200200
private:
201201
std::unique_ptr<openvino_ep::GlobalContext> global_context_;
202-
openvino_ep::EPCtxHandler ep_ctx_handle_{};
203202
std::shared_ptr<openvino_ep::BackendManager> backend_manager_;
203+
openvino_ep::EPCtxHandler ep_ctx_handle_{};
204204
};
205205

206206
} // namespace onnxruntime

onnxruntime/core/providers/openvino/ov_versions/capability.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,14 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
3535
device_type_ = "CPU";
3636
if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
3737
}
38-
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
39-
data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled);
40-
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 2
41-
data_ops_ = new DataOps(graph_viewer_, V_2024_2, device_type_, npu_qdq_optimizer_enabled);
42-
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
38+
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
4339
data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
4440
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4
4541
data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
42+
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
43+
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
4644
#else
47-
data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
45+
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
4846
#endif
4947
}
5048

onnxruntime/core/providers/openvino/ov_versions/data_ops.cc

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ std::vector<SupportedOp> supported_op_mode = {
118118
{"CumSum", V_2022_1, {"CPU", "GPU"}},
119119
{"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
120120
{"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
121+
{"DequantizeLinear", V_2024_4, {"NPU"}},
121122
{"Div", V_2020_4, {"CPU", "GPU"}},
122123
{"Dropout", V_2020_4, {"CPU", "GPU"}},
123124
{"Elu", V_2020_4, {"CPU", "GPU"}},
@@ -254,6 +255,8 @@ void DataOps::populate_types_supported() {
254255
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
255256
supported_types_initializer_.insert(
256257
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
258+
supported_types_initializer_.insert(
259+
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
257260
supported_types_initializer_.insert(
258261
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16));
259262
supported_types_initializer_.insert(
@@ -262,6 +265,10 @@ void DataOps::populate_types_supported() {
262265
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
263266
supported_types_initializer_.insert(
264267
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
268+
supported_types_initializer_.insert(
269+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
270+
supported_types_initializer_.insert(
271+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
265272

266273
supported_types_npu_.insert(
267274
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
@@ -285,6 +292,10 @@ void DataOps::populate_types_supported() {
285292
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
286293
supported_types_npu_.insert(
287294
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ));
295+
supported_types_npu_.insert(
296+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
297+
supported_types_npu_.insert(
298+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
288299

289300
supported_types_cpu_.insert(
290301
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
@@ -304,6 +315,10 @@ void DataOps::populate_types_supported() {
304315
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
305316
supported_types_cpu_.insert(
306317
std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
318+
supported_types_cpu_.insert(
319+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
320+
supported_types_cpu_.insert(
321+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
307322

308323
supported_types_gpu_.insert(
309324
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
@@ -319,6 +334,10 @@ void DataOps::populate_types_supported() {
319334
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
320335
supported_types_gpu_.insert(
321336
std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
337+
supported_types_gpu_.insert(
338+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
339+
supported_types_gpu_.insert(
340+
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
322341
}
323342

324343
void DataOps::populate_op_mode_supported() {
@@ -368,7 +387,7 @@ void DataOps::populate_op_mode_supported() {
368387

369388
// populate unsupportedmode_t
370389
{
371-
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4},
390+
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
372391
[this](const Node* node, const InitializedTensorSet&) {
373392
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
374393
for (size_t i = 0; i < node->InputDefs().size(); i++) {
@@ -383,7 +402,7 @@ void DataOps::populate_op_mode_supported() {
383402
op_list_.insert({"ReduceMax", obj});
384403
}
385404
{
386-
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
405+
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
387406
[this](const Node* node, const InitializedTensorSet&) {
388407
const auto& input_arg = node->InputDefs()[1];
389408
auto shape = input_arg->Shape();
@@ -400,7 +419,7 @@ void DataOps::populate_op_mode_supported() {
400419
op_list_.insert({"Reshape", obj});
401420
}
402421
{
403-
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
422+
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
404423
[this](const Node* node, const InitializedTensorSet&) {
405424
// If the operator is unsqueeze
406425
// If axes is an input, then we cannot produce a static graph.
@@ -415,7 +434,7 @@ void DataOps::populate_op_mode_supported() {
415434
op_list_.insert({"Unsqueeze", obj});
416435
}
417436
{
418-
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
437+
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
419438
[this](const Node* node, const InitializedTensorSet&) {
420439
// check for attributes
421440
auto& upsample_attr = node->GetAttributes();

onnxruntime/core/providers/openvino/ov_versions/data_ops.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ enum versionNum {
3131
V_2024_1,
3232
V_2024_2,
3333
V_2024_3,
34-
V_2024_4
34+
V_2024_4,
35+
V_2024_5
3536
};
3637

3738
using VersionNum = enum versionNum;

tools/ci_build/github/azure-pipelines/linux-openvino-ci-pipeline.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,5 @@ jobs:
3333
parameters:
3434
AgentPool : 'Linux-CPU-2019'
3535
JobName: 'Linux_CI_Dev'
36-
RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.3.0 -x "--use_openvino CPU --build_wheel"'
36+
RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.4.0 -x "--use_openvino CPU --build_wheel"'
3737
TimeoutInMinutes: 120

tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
ARG UBUNTU_VERSION=22.04
22
FROM ubuntu:${UBUNTU_VERSION}
33

4-
ARG OPENVINO_VERSION=2024.3.0
4+
ARG OPENVINO_VERSION=2024.4.0
55
ARG PYTHON_VERSION=3.10
66

77
ADD scripts /tmp/scripts
@@ -19,9 +19,9 @@ ENV IE_PLUGINS_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64
1919
ENV DEBIAN_FRONTEND=noninteractive
2020

2121
RUN cd /opt && mkdir -p intel && cd intel && \
22-
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.3/linux/l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && \
23-
tar xzf l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && \
24-
mv l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64 openvino_2024.3.0 && \
22+
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \
23+
tar xzf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \
24+
mv l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64 openvino_2024.4.0 && \
2525
cd $INTEL_OPENVINO_DIR/install_dependencies && ./install_openvino_dependencies.sh -y
2626

2727
WORKDIR /root

0 commit comments

Comments
 (0)