Skip to content

feat: Enable EpContext OVIR Encapsulation #704

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ BackendManager::BackendManager(SessionContext& session_context,
session_context_(session_context),
shared_context_{shared_context} {
subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph);
// If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute
subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph,
session_context_.onnx_model_path_name.filename().replace_extension("xml").string());

subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) {
// return empty if graph has no inputs or if types are not one of FP32/FP16
Expand Down Expand Up @@ -192,9 +195,10 @@ BackendManager::BackendManager(SessionContext& session_context,
}
}
}
if (session_context_.so_context_enable && !subgraph_context_.is_ep_ctx_graph) {
if (session_context_.so_context_enable &&
(subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) {
auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph);
if ((!status.IsOK())) {
if (!status.IsOK()) {
ORT_THROW(status);
}
}
Expand Down
27 changes: 27 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,33 @@
metadata_map.clear();
}

bool IsModelStreamXML(std::istream& model_stream) {
std::streampos originalPos = model_stream.tellg();

// first, get the total size of model_stream in bytes
model_stream.seekg(0, std::ios::end);
auto end_pos = model_stream.tellg();
// Restore the stream position
model_stream.seekg(originalPos);
auto total_size = end_pos - originalPos;

// Choose 32 bytes to hold content of:
// '<?xml version-"1.0"?> <net '
const std::streamsize header_check_len = 32;
ORT_ENFORCE(total_size > header_check_len);

// read 32 bytes into header
std::string header(header_check_len, '\0');
model_stream.read(&header[0], header_check_len);
// Clear any read errors
model_stream.clear();
// Restore the stream position
model_stream.seekg(originalPos);

// return true if the header starts with '<?xml' and also includes '<net '
return ((header.rfind("<?xml", 0) == 0) && (header.find("<net ") != std::string::npos));

Check notice on line 427 in onnxruntime/core/providers/openvino/backend_utils.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/backend_utils.cc#L427

Add #include <string> for string [build/include_what_you_use] [4]
Raw output
onnxruntime/core/providers/openvino/backend_utils.cc:427:  Add #include <string> for string  [build/include_what_you_use] [4]
}

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,

void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName);

bool IsModelStreamXML(std::istream& model_stream);

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
38 changes: 32 additions & 6 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,38 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
!session_context_.so_disable_cpu_ep_fallback &&
!subgraph_context_.is_ep_ctx_graph);
if (subgraph_context_.is_ep_ctx_graph) {
// If the blob is held in an EPContext node, then skip FE+Compile
// and directly move on to creating a backend with the executable blob
exe_network_ = OVCore::Get()->ImportModel(*model_stream,
hw_target,
device_config,
subgraph_context_.subgraph_name);
if (subgraph_context_.is_ep_ctx_ovir_encapsulated) {
// model_file_path will use so_context_file_path if the onnx_model_path_name is not available,
// especially in case of CreateSessionFormArray() where user must explicitly
// specify absolute path for so_context_file_path.
auto model_file_path = [this]() {
if (!session_context_.onnx_model_path_name.empty() &&
std::filesystem::exists(session_context_.onnx_model_path_name)) return session_context_.onnx_model_path_name;

ORT_ENFORCE(!session_context_.so_context_file_path.empty() &&
std::filesystem::path(session_context_.so_context_file_path).is_absolute() &&
std::filesystem::exists(session_context_.so_context_file_path), log_tag +
"Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly."
" Please set a valid absolute path for ep.context_file_path in session options.");
// Return absolute context file path as input to ImportEPCtxOVIREncapsulation() function.
return session_context_.so_context_file_path;

};
// If the EPContext node with OVIR Encapsulation, then create
// an executable network from EP_CACHE_CONTEXT using read_model() & compile_model()
exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream,
hw_target,
device_config,
enable_causallm,
model_file_path());
} else {
// If the blob is held in an EPContext node, then skip FE+Compile
// and directly move on to creating a backend with the executable blob
exe_network_ = OVCore::Get()->ImportModel(*model_stream,
hw_target,
device_config,
subgraph_context_.subgraph_name);
}
model_stream.reset(); // Delete stream after it is no longer needed
} else if (!session_context_.has_external_weights &&
!subgraph_context_.has_dynamic_input_shape &&
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ struct SubGraphContext {
string_index_map_t output_names;
std::string model_precision;
bool is_ep_ctx_graph = false;
bool is_ep_ctx_ovir_encapsulated = false;
};

} // namespace openvino_ep
Expand Down
39 changes: 38 additions & 1 deletion onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <algorithm>

#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/backend_utils.h"

namespace onnxruntime {
namespace openvino_ep {
Expand Down Expand Up @@ -123,6 +124,16 @@ std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesy
ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string());
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
}

bool isXML = backend_utils::IsModelStreamXML(*result);
if (!isXML) {
// If the model stream is not an XML (i.e. precompiled blob), the OpenVINO SDK version that it was
// exported with must match the version that is currently running.
ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_),
"EPCtx blob was exported / is compatible with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() +
", but OpenVINO SDK version currently in use is " + openvino_sdk_version_);
}

LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
return result;
}
Expand All @@ -142,7 +153,6 @@ bool EPCtxHandler::CheckForOVEPCtxNode(const Node& node) const {
if (node.OpType() == EPCONTEXT_OP) {
auto& attrs = node.GetAttributes();
bool result = (attrs.count(SOURCE) == 1) && (attrs.at(SOURCE).s() == kOpenVINOExecutionProvider);
result &= (attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_);
result &= attrs.count(EMBED_MODE) == 1;
result &= attrs.count(EP_CACHE_CONTEXT) == 1;
return result;
Expand All @@ -155,5 +165,32 @@ InlinedVector<const Node*> EPCtxHandler::GetEPCtxNodes() const {
return InlinedVector<const Node*>(epctx_nodes.begin(), epctx_nodes.end());
}

// Check if graph's only node is EPContext & EP_CACHE_CONTEXT attribute has target extension.
// @param graph_viewer: The graph to inspect.
// @param target_attr_extn: The string to search for in the EP_CACHE_CONTEXT attribute.
// @return true if the node exists, is of the correct type, and the attribute contains the extension; false otherwise.
bool EPCtxHandler::CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const {
// Only check if the graph has exactly one node
if (graph_viewer.NumberOfNodes() != 1) {
return false;
}
// Get the first node in topological order
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
const Node* node = graph_viewer.GetNode(first_index);
if (!node) {
return false;
}
// Check OpType and required attributes
if (node->OpType() != EPCONTEXT_OP) {
return false;
}
const auto& attrs = node->GetAttributes();
auto it = attrs.find(EP_CACHE_CONTEXT);
if (it != attrs.end()) {
return it->second().s().find(target_attr_extn) != std::string::npos;
}
return false;
}

} // namespace openvino_ep
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class EPCtxHandler {
std::string&& model_blob_str) const;
std::unique_ptr<std::istream> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const;
InlinedVector<const Node*> GetEPCtxNodes() const;
bool CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const;

private:
const std::string openvino_sdk_version_;
Expand Down
59 changes: 56 additions & 3 deletions onnxruntime/core/providers/openvino/ov_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ void printDebugInfo(const ov::CompiledModel& obj) {
continue;
OPENVINO_SUPPRESS_DEPRECATED_END
std::cout << " " << item2.first << ": " << item2.second.as<std::string>() << std::endl;
}
}
} else {
std::cout << " " << cfg << ": " << prop.as<std::string>() << std::endl;
Expand Down Expand Up @@ -101,10 +100,10 @@ OVExeNetwork OVCore::StatefulCompileModel(std::shared_ptr<OVNetwork>& model,
LogBasicModelInfo(model);
}

LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl;
bool model_status = IsStateful(model);
LOGS_DEFAULT(INFO) << log_tag << "Model IsStateful() Status:\t" << (model_status ? "True" : "False");
if (!model_status) {
LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl;
PatchStatefulDecoder(model);
}

Expand Down Expand Up @@ -198,15 +197,69 @@ OVExeNetwork OVCore::ImportModel(std::istream& model_stream,
return OvExceptionBoundary([&]() {
ov::CompiledModel obj;
obj = core.import_model(model_stream, hw_target, device_config);
OVExeNetwork exe(obj, hw_target);
#ifndef NDEBUG
printDebugInfo(exe.Get());
#endif
OVExeNetwork exe(obj, hw_target);
return exe;
},
"Exception while Loading Network for graph {}", name);
}

OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream,
std::string& hw_target,
const ov::AnyMap& device_config,
bool enable_causallm,
std::filesystem::path model_file_path) {
return OvExceptionBoundary([&]() {
OVExeNetwork exe;

bool isXML = backend_utils::IsModelStreamXML(model_stream);

// Helper function to check if file exists and is readable
const auto check_file_access = [&model_file_path](const std::filesystem::path& path) {
try {
if (!std::filesystem::exists(path) || std::filesystem::is_empty(path)) {
ORT_THROW(log_tag + "Required file missing or empty: " + path.string());
}
std::ifstream file(path);
if (!file) {
ORT_THROW(log_tag + "Required file not readable: " + path.string());
}
} catch (const std::exception& e) {
ORT_THROW(log_tag + "Exception while checking file access for: " + path.string() + " - " + e.what());
}
};

if (isXML) {
// If the model is XML, we need to load it with the XML content in read_model()
// where weights from bin file is directly consumed
auto xml_file_path = model_file_path.parent_path() / (model_file_path.stem().string() + ".xml");

check_file_access(xml_file_path);

LOGS_DEFAULT(INFO) << log_tag << "Reading OVIR from XML file path: " << xml_file_path.string();

// Load the model explicitly with XML contents
std::shared_ptr<ov::Model> model = core.read_model(xml_file_path.string());

if (enable_causallm) {
exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config);
} else {
auto obj = core.compile_model(model, hw_target, device_config);
exe = OVExeNetwork(obj, hw_target);
}
}

#ifndef NDEBUG
printDebugInfo(exe.Get());
#endif
return exe;
},
"Exception while Loading Network from OVIR model file: {}", model_file_path.string());
}


void OVCore::SetCache(const std::string& cache_dir_path) {
core.set_property(ov::cache_dir(cache_dir_path));
}
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ struct OVCore : WeakSingleton<OVCore> {
std::string hw_target,
const ov::AnyMap& device_config,
std::string name);
OVExeNetwork ImportEPCtxOVIREncapsulation(std::istream& model_stream,
std::string& hw_target,
const ov::AnyMap& device_config,
bool enable_causallm,
std::filesystem::path model_file_path);

std::vector<std::string> GetAvailableDevices() const;
std::vector<std::string> GetAvailableDevices(const std::string& device_type) const;
void SetCache(const std::string& cache_dir_path);
Expand Down
Loading