Skip to content

Add support for session option ep.stop_context_sharing #655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 22 additions & 16 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,22 +83,23 @@ BackendManager::BackendManager(SessionContext& session_context,
}
std::string device_type = session_context_.device_type;

auto& sw = shared_context_.shared_weights;
if (session_context_.so_share_ep_contexts) {
std::filesystem::path weight_filename = session_context_.onnx_model_path_name.parent_path();
if (sw.external_weight_filename.empty() && !sw.metadata.empty()) {
// Reasonable assumption that all metadata entries have the same external file location
sw.external_weight_filename = sw.metadata.begin()->second.location;
}
weight_filename /= sw.external_weight_filename;
std::ifstream weight_file(weight_filename);
// Check if model is using external weights
if (auto filename = backend_utils::GetExternalWeightFilename(subgraph)) {
std::filesystem::path weights_filepath = session_context_.onnx_model_path_name.parent_path() / filename.value();

if (weight_file) {
if (!sw.mapped_weights) {
sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
}
backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
// Initialize external weights with fully qualified path
if (!std::filesystem::exists(weights_filepath)) {
ORT_THROW("Error: Failed to locate weight file at ", weights_filepath.string());
}

external_weights_.emplace(weights_filepath);
}

if (session_context_.so_share_ep_contexts) {
ORT_ENFORCE(external_weights_.has_value(), "Expected external weight object to be valid");
backend_utils::CreateOVTensors(session_context_.device_type,
shared_context_.shared_weights.metadata,
external_weights_.value());
}

if (ModelHasSymbolicInputDims(subgraph)) {
Expand Down Expand Up @@ -324,7 +325,7 @@ static bool IsQDQGraph(const onnxruntime::GraphViewer& graph_viewer) {
static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name,
[[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto,
[[maybe_unused]] const onnxruntime::Node& fused_node) {
#ifndef RELEASE
#ifdef NOT_RELEASE
if (openvino_ep::backend_utils::IsDebugEnabled()) {
auto model_name = onnx_model_path_name.empty() ? "unknown.onnx" : onnx_model_path_name.filename();

Expand Down Expand Up @@ -384,7 +385,12 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
if (session_context_.device_type.find("NPU") != std::string::npos &&
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, enable_ovep_qdq_optimizer, model, shared_context_.shared_weights);
Status status = CreateModelWithStrippedQDQNodes(subgraph,
logger,
session_context_.so_share_ep_contexts,
enable_ovep_qdq_optimizer,
model,
shared_context_.shared_weights.metadata);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class BackendManager {
EPCtxHandler& ep_ctx_handle_;
SessionContext& session_context_;
SharedContext& shared_context_;
std::optional<fs::path> external_weights_;
};

} // namespace openvino_ep
Expand Down
90 changes: 66 additions & 24 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <sstream>
#include <fstream>
#include <utility>
#include <string>

#include <filesystem>
#include <stdexcept>
Expand All @@ -20,22 +21,7 @@ using Exception = ov::Exception;
namespace onnxruntime {
namespace openvino_ep {

SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
try {
file_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
weights_size_ = file_.seekg(0, std::ios::end).tellg();
} catch (std::ifstream::failure& e) {
ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what());
}
}

void SharedContext::SharedWeights::WeightsFile::load_weights(size_t file_offset, void* data, size_t size) {
ORT_ENFORCE(file_offset < weights_size_ && size <= weights_size_ && (file_offset <= weights_size_ - size), "Error: File offset is out of bounds.");
file_.seekg(file_offset);
file_.read(reinterpret_cast<char*>(data), size);
}

std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
std::ostream& operator<<(std::ostream& stream, const Metadata::Map& metadata) {
try {
stream << metadata.size();

Expand Down Expand Up @@ -69,14 +55,14 @@ std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeight
return stream;
}

std::istream& operator>>(std::istream& stream, SharedContext::SharedWeights::Metadata::Map& metadata) {
std::istream& operator>>(std::istream& stream, Metadata::Map& metadata) {
size_t map_size{0};
try {
stream >> map_size;

while (!stream.eof()) {
SharedContext::SharedWeights::Metadata::Key key;
SharedContext::SharedWeights::Metadata::Value value;
Metadata::Key key;
Metadata::Value value;
stream >> key.name;
stream >> value.location;
stream >> value.data_offset;
Expand Down Expand Up @@ -399,8 +385,19 @@ ov::element::Type GetOpenVINOElementType(ONNX_NAMESPACE::TensorProto_DataType dt

// Function to handle tensor creation from external data
void CreateOVTensors(const std::string& device_name,
SharedContext::SharedWeights::Metadata::Map& metadata_map,
SharedContext::SharedWeights::WeightsFile& weights) {
Metadata::Map& metadata_map,
std::filesystem::path& weights_filepath) {
// File is guaranteed to exist at this point
std::ifstream file(weights_filepath, std::ios::in | std::ios::binary);
file.exceptions(std::ifstream::failbit | std::ifstream::badbit);
size_t weights_size = std::filesystem::file_size(weights_filepath);

const auto load_weights = [&file, weights_size](size_t file_offset, void* data, size_t size) {
ORT_ENFORCE(file_offset < weights_size && size <= weights_size && (file_offset <= weights_size - size), "Error: File offset is out of bounds.");
file.seekg(file_offset);
file.read(reinterpret_cast<char*>(data), size);
};

for (auto& [key, value] : metadata_map) {
if (value.tensor) continue;

Expand All @@ -416,18 +413,18 @@ void CreateOVTensors(const std::string& device_name,
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);

// Copy data to remote tensor
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
load_weights(value.data_offset, remote_tensor.get(), value.size);
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
} else {
// Use vanilla tensors
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);
weights.load_weights(value.data_offset, value.tensor->data(), value.size);
load_weights(value.data_offset, value.tensor->data(), value.size);
}
ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");
}
}

void DestroyOVTensors(SharedContext::SharedWeights::Metadata::Map& metadata_map) {
void DestroyOVTensors(Metadata::Map& metadata_map) {
for (auto& [key, value] : metadata_map) {
if (value.tensor) {
value.tensor.reset();
Expand All @@ -436,6 +433,51 @@ void DestroyOVTensors(SharedContext::SharedWeights::Metadata::Map& metadata_map)
metadata_map.clear();
}

std::optional<std::string> GetExternalWeightFilename(const GraphViewer& graph) {
auto get_external_location = [](const ONNX_NAMESPACE::TensorProto& proto) -> std::optional<std::string> {
using mutable_proto_t = ONNX_NAMESPACE::TensorProto*;
auto& mutable_proto = *const_cast<mutable_proto_t>(&proto);
auto* entry_protos = mutable_proto.mutable_external_data();

if (proto.has_data_location() && proto.data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL) {
for (int i = 0; i < entry_protos->size(); i++) {
auto& string_entry_proto{entry_protos->at(i)};
const auto& pb_key{*(string_entry_proto.mutable_key())};
const auto& pb_value{*(string_entry_proto.mutable_value())};
if (pb_key == "location") {
return std::make_optional<std::string>(pb_value);
}
}
}

return std::nullopt;
};

// Handle constant initializers
auto& initializers = graph.GetAllInitializedTensors();
for (const auto& it : initializers) {
if (auto result = get_external_location(*it.second)) {
return result;
}
}

// Handle outer-scope constant initializers
for (auto& node_idx : graph.GetNodesInTopologicalOrder()) {
const auto& node = graph.GetNode(node_idx);
for (const auto& input : node->InputDefs()) {
if (graph.IsConstantInitializer(input->Name(), true)) {
const auto& initializer_tensor = *graph.GetConstantInitializer(input->Name(), true);

if (auto result = get_external_location(initializer_tensor)) {
return result;
}
}
}
}

return std::nullopt;
}

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
9 changes: 6 additions & 3 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,18 @@ CreateOVModel(std::string&& model,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

void CreateOVTensors(const std::string& device_name,
SharedContext::SharedWeights::Metadata::Map& metadata_map,
SharedContext::SharedWeights::WeightsFile& weights);
void DestroyOVTensors(SharedContext::SharedWeights::Metadata::Map& metadata_map);
Metadata::Map& metadata_map,
std::filesystem::path& weights_filepath);
void DestroyOVTensors(Metadata::Map& metadata_map);

void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
std::ostream& stream, std::string deviceName);

void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName);

// Returns the location string from the first external initializer nodes found or nullopt if none found
std::optional<std::string> GetExternalWeightFilename(const GraphViewer& graph);

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,12 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
std::function<void(OVInferRequestPtr)> initializer = [](OVInferRequestPtr) {};
auto metadata = shared_context_.shared_weights.metadata;
if (session_context_.so_share_ep_contexts) {
// When shared ep contexts is set external weight references are transformed to model inputs. This
// creates an initializer to populate/bind input weight tensors to each inference request
initializer = [&metadata](OVInferRequestPtr ir_ptr) {
const auto input_count = ir_ptr->GetNumInputs();
for (auto i = 0u; i < input_count; i++) {
using Key = SharedContext::SharedWeights::Metadata::Key;
using Key = Metadata::Key;
const auto tensor_key = Key{ir_ptr->GetInputTensorName(i)};
if (metadata.contains(tensor_key)) {
auto& value = metadata.at(tensor_key);
Expand All @@ -137,6 +139,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
}
};
}

// Create inference request queue and initialize according to passed function
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, num_infer_req, std::move(initializer)));
}

Expand Down
65 changes: 28 additions & 37 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,52 +18,42 @@ namespace openvino_ep {

namespace fs = std::filesystem;

struct Metadata {
struct Key {
std::string name;
bool operator==(const Key&) const = default;
};
struct Hash {
std::size_t operator()(const Key& key) const noexcept {
return std::hash<std::string>()(key.name);
}
};
struct Value {
std::string location;
unsigned int data_offset;
unsigned int size;
std::vector<size_t> dimensions;
std::int32_t element_type;
std::shared_ptr<ov::Tensor> tensor;
};
using Map = std::unordered_map<Key, Value, Hash>;
friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata);
friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata);
};

class SharedContext : public WeakSingleton<SharedContext> {
// Keep the core alive as long as the shared SharedContext are alive.
std::shared_ptr<OVCore> OVCore_;

public:
SharedContext() : OVCore_(OVCore::Get()) {}
struct SharedWeights {
struct Metadata {
struct Key {
std::string name;
bool operator==(const Key&) const = default;
};
struct Hash {
std::size_t operator()(const Key& key) const noexcept {
return std::hash<std::string>()(key.name);
}
};
struct Value {
std::string location;
unsigned int data_offset;
unsigned int size;
std::vector<size_t> dimensions;
std::int32_t element_type;
std::shared_ptr<ov::Tensor> tensor;
};
using Map = std::unordered_map<Key, Value, Hash>;
friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata);
friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata);
};

struct WeightsFile {
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(WeightsFile);
WeightsFile() = delete;
explicit WeightsFile(std::filesystem::path filename);

void load_weights(size_t file_offset, void* data, size_t size);

private:
std::ifstream file_;
size_t weights_size_;
};

fs::path external_weight_filename;
std::unique_ptr<WeightsFile> mapped_weights;
Metadata::Map metadata;
} shared_weights;

void clear() { // Deletes the data stored in the SharedContext
shared_weights.metadata.clear();
}
};

using config_t = std::map<std::string, ov::AnyMap>;
Expand Down Expand Up @@ -102,6 +92,7 @@ struct ProviderInfo {
bool so_context_embed_mode{false}; // ORT session option
bool so_share_ep_contexts{false}; // ORT session option
fs::path so_context_file_path{}; // ORT session option
bool so_stop_share_ep_contexts{false}; // ORT session option
const ConfigOptions* config_options{NULL};
const std::unordered_set<std::string> valid_provider_keys = {"device_type", "device_id", "device_luid", "cache_dir", "precision",
"load_config", "context", "num_of_threads", "model_priority", "num_streams", "enable_opencl_throttling", "enable_qdq_optimizer",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ OpenVINOExecutionProvider::~OpenVINOExecutionProvider() {
backend_manager.ShutdownBackendManager();
}
backend_managers_.clear();
shared_context_.reset();
}

std::vector<std::unique_ptr<ComputeCapability>>
Expand Down Expand Up @@ -106,7 +107,12 @@ common::Status OpenVINOExecutionProvider::Compile(
auto& metadata = shared_context_->shared_weights.metadata;
if (session_context_.so_share_ep_contexts && metadata.empty()) {
// Metadata is always read from model location, this could be a source or epctx model
fs::path metadata_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin";
fs::path metadata_filename;
if (session_context_.so_context_file_path.empty()) {
metadata_filename = session_context_.onnx_model_path_name.parent_path() / "metadata.bin";
} else {
metadata_filename = session_context_.so_context_file_path.parent_path() / "metadata.bin";
}
std::ifstream file(metadata_filename, std::ios::binary);
if (file) {
file >> metadata;
Expand Down Expand Up @@ -191,6 +197,10 @@ common::Status OpenVINOExecutionProvider::Compile(
}
}

if (session_context_.so_stop_share_ep_contexts) {
shared_context_->clear();
}

return status;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ void ParseConfigOptions(ProviderInfo& pi) {
pi.so_context_embed_mode = pi.config_options->GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "0") == "1";
pi.so_share_ep_contexts = pi.config_options->GetConfigOrDefault(kOrtSessionOptionShareEpContexts, "0") == "1";
pi.so_context_file_path = pi.config_options->GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "");
pi.so_stop_share_ep_contexts = pi.config_options->GetConfigOrDefault(kOrtSessionOptionStopShareEpContexts, "0") == "1";

if (pi.so_share_ep_contexts) {
ov::AnyMap map;
Expand Down
Loading
Loading