From 87e463e2b8da37e700ae60b55c3191bc30331ecd Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Mon, 14 Oct 2024 16:23:22 +0100 Subject: [PATCH] NPUW: Fix - don't register an extra parameter to a group (#27032) ### Details: - There's a case where there's a Param->Convert path that stays in model HEAD, and then it acts as an input to all other partitions. In this case, this convert is seen as "extra" input which needs to be registetered - mistakenly. ### Tickets: - E-138529 --- .../plugin/npuw/partitioning/partitioning.cpp | 18 ++++++++++++++---- src/plugins/intel_npu/src/plugin/npuw/util.hpp | 8 +++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 58a8219f497fed..5e3f12fedf68a6 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -345,6 +345,10 @@ void Partitioner::identifySubgraphs() { } LOG_INFO("Caching done: " << node_id_cache.size() << " layers."); + // Accumulate knowledge about known OV layers when walking + // over a topologically-sorted list. + std::unordered_set nodes_known_now; + // FIXME: Need to do some sanity checks here. What if partitioning // has been generated for another variation of this model? // What if that was a completely different model? @@ -458,16 +462,19 @@ void Partitioner::identifySubgraphs() { continue; } else if ((ov::is_type(input_node) || ov::is_type(input_node)) && + !nodes_known_now.count(input_node) && ov::op::util::is_parameter(input_node->input(0).get_source_output().get_node_shared_ptr())) { // So the situation is: - // - a group has an input layer + // - a group has an input layer // - which reads from a Slice or Convert // - which reads from a Parameter + // - not a part of any prior group // This happens when an offline plan is used with a kvcache // model extended with slices to maintain zero-copy (LLM case) auto extra_param = input_node->input(0).get_source_output().get_node_shared_ptr(); input_mapping[input_node] = extra_param; extra_params.insert(extra_param); + LOG_DEBUG("Registered extra param " << extra_param); } else { // Ok, this input is connected to some other node's output // Replace this connection with a link to a newly created Parameter @@ -671,7 +678,8 @@ void Partitioner::identifySubgraphs() { } } this_group_idx++; // FIXME: indexed() is better! - } // for (partitions) + nodes_known_now.insert(group_nodes.begin(), group_nodes.end()); + } // for (partitions) // Return what we've got here std::vector& result = P.subgraphs; @@ -1387,14 +1395,16 @@ void Partitioner::matchParameters(const std::string& func_name) { this_model_nodes.insert(node_ptr.get()); } for (auto&& node : call->get_ordered_ops()) { + using ov::npuw::util::at::_; + if (ov::op::util::is_parameter(node)) { PKey pkey; for (auto&& iport : node->output(0).get_target_inputs()) { if (this_model_nodes.count(iport.get_node()) > 0) { LOG_DEBUG("Register link " << iport.get_node()->get_friendly_name() << " : " << iport.get_index()); - pkey.insert( - PReader{layer_to_prototype.at(iport.get_node()->get_friendly_name()), iport.get_index()}); + pkey.insert(PReader{_(layer_to_prototype).at(iport.get_node()->get_friendly_name()), + iport.get_index()}); } } LOG_DEBUG("Find orig parameter for " << node); diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp index 1704314aee75ea..02d2c8c097811e 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp @@ -65,8 +65,9 @@ ov::Tensor permute(const ov::Tensor& t, const std::vector& axes); ov::Tensor concat(const std::vector& tt, std::size_t axis); namespace at { -template +template struct Impl { + using M = typename std::decay::type; using V = typename M::mapped_type; M* m = nullptr; @@ -96,6 +97,11 @@ Impl _(M* pM) { return Impl(pM); } +template +Impl _(M&& m) { + return Impl(&m); +} + template Impl _(std::shared_ptr pM) { return Impl(pM.get());