fix the issue when cache enabled.

openvinotoolkit · yangwang201911 · Oct 11, 2024 · Oct 12, 2024 · Oct 12, 2024 · Oct 12, 2024
commit 62086422284f0ddbab8c874587aa64ae98657234
diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp
@@ -2136,7 +2136,7 @@ TEST_P(CachingTest, LoadAUTO_OneDeviceNoImportExport) {
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 2 : 0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
         .Times(!m_remoteContext ? 2 : 0);
-    EXPECT_CALL(*mockPlugin, OnCompileModelFromFile()).Times(0);
+    EXPECT_CALL(*mockPlugin, OnCompileModelFromFile()).Times(m_type == TestLoadType::EModelName ? 2 : 0);
     EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(0);
     testLoad([&](ov::Core& core) {

@@ -133,12 +133,7 @@ void AutoSchedule::init() {
     auto customize_helper_context_from_cache_setting = [this](bool is_actual_cpu,
                                                               AutoCompileContext m_compile_context[],
                                                               ScheduleContext::Ptr& m_context) {
-        const auto cpu_iter = deviceChecker().check_and_return_if_device_in_list("CPU", m_context->m_device_priorities);
-        if (cpu_iter == m_context->m_device_priorities.end()) {
-            m_compile_context[CPU].m_is_enabled = false;
-            return;
-        }
-        m_compile_context[CPU].m_is_enabled = true;
+        bool is_stateful_model = false;
         if (!is_actual_cpu) {
             const auto& device = m_compile_context[ACTUALDEVICE].m_device_info.device_name;
             auto& device_config = m_compile_context[ACTUALDEVICE].m_device_info.config;
@@ -157,13 +152,58 @@ void AutoSchedule::init() {
                 else
                     blobId = ov::ModelCache::compute_hash(m_context->m_model_path, properties);
                 std::string cached_model_path = ov::util::make_path(cache_dir, blobId + ".blob");
-                m_compile_context[CPU].m_is_enabled = !ov::util::file_exists(cached_model_path);
-                LOG_DEBUG_TAG("device: %s %s cached blob: %s ",
-                              device.c_str(),
-                              m_compile_context[CPU].m_is_enabled ? "not found" : "found",
-                              cached_model_path.c_str());
+                if (!ov::util::file_exists(cached_model_path)) {
+                    LOG_DEBUG_TAG("device: %s not found cached blob: %s ", device.c_str(), cached_model_path.c_str());
+                    // not found blob file
+                    if (!m_context->m_model) {
+                        // passed model path
+                        std::cout << "=== blob not found and will read model here ===\n";
+                        auto m_model = m_context->m_ov_core->read_model(m_context->m_model_path, std::string{});
+                        for (auto& op : m_model->get_ops()) {
+                            if (std::dynamic_pointer_cast<ov::op::util::AssignBase>(op) ||
+                                std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(op)) {
+                                is_stateful_model = true;
+                                break;
+                            }
+                        }
+                        if (is_stateful_model) {
+                            std::cout
+                                << "=== stateful model. will disable CPU as accelerator and runtime fallback ===\n";
+                            m_compile_context[CPU].m_is_enabled = false;
+                            m_context->m_runtime_fallback = false;
+                            m_context->m_startup_fallback = false;
+                        }
+                    }
+                } else {
+                    // found blob file
+                    std::cout << "=== found blob and will passing model path to acutal device ===\n";
+                    LOG_DEBUG_TAG("device: %s found cached blob: %s ", device.c_str(), cached_model_path.c_str());
+                    m_compile_context[CPU].m_is_enabled = false;
+                    m_context->m_startup_fallback = false;
+                    if (m_context->m_model) {
+                        m_context->m_runtime_fallback = false;
+                    } else {
+                        auto m_model = m_context->m_ov_core->read_model(m_context->m_model_path, std::string{});
+                        for (auto& op : m_model->get_ops()) {
+                            if (std::dynamic_pointer_cast<ov::op::util::AssignBase>(op) ||
+                                std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(op)) {
+                                is_stateful_model = true;
+                                break;
+                            }
+                        }
+                        if (is_stateful_model) {
+                            std::cout << "=== stateful model. will disable runtime fallback ===\n";
+                            m_context->m_runtime_fallback = false;
+                        }
+                    }
+                }
             }
         }
+        const auto cpu_iter = deviceChecker().check_and_return_if_device_in_list("CPU", m_context->m_device_priorities);
+        if (cpu_iter == m_context->m_device_priorities.end()) {
+            m_compile_context[CPU].m_is_enabled = false;
+            return;
+        }
         if (m_compile_context[CPU].m_is_enabled) {
             m_compile_context[CPU].m_device_info = *cpu_iter;
             m_compile_context[CPU].m_device_info.config[ov::hint::performance_mode.name()] =
@@ -184,9 +224,8 @@ void AutoSchedule::init() {
         // m_compile_context[ACTUALDEVICE]
         if (is_actual_cpu || !m_context->m_startup_fallback) {
             m_compile_context[CPU].m_is_enabled = false;
-        } else {
-            customize_helper_context_from_cache_setting(is_actual_cpu, m_compile_context, m_context);
         }
+        customize_helper_context_from_cache_setting(is_actual_cpu, m_compile_context, m_context);
         // initialize the rest members of load context
         for (int i = 0; i < CONTEXTNUM; i++) {
             if (m_compile_context[i].m_is_enabled) {
@@ -336,7 +375,7 @@ void AutoSchedule::try_to_compile_model(AutoCompileContext& context, const std::
         if ((m_context->m_model)) {
             context.m_compiled_model = m_context->m_ov_core->compile_model(model, device, device_config);
         } else {
-            OPENVINO_THROW("OpenVino Model is empty!");
+            context.m_compiled_model = m_context->m_ov_core->compile_model(m_context->m_model_path, device, device_config);
         }
         context.m_is_load_success = true;
         auto compile_end_time = std::chrono::high_resolution_clock::now();

@@ -405,10 +405,28 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model_impl(const std::string
         cloned_model = model->clone();
     } else {
         LOG_INFO_TAG("compile model with model path");
-        if (work_mode_auto) {
-            cloned_model = get_core()->read_model(model_path, std::string{});
+        auto iter_plugin_cache_dir = properties.find(ov::cache_dir.name());
+        std::string cache_dir =
+            iter_plugin_cache_dir != properties.end() ? iter_plugin_cache_dir->second.as<std::string>() : "";
+        if (cache_dir.empty()) {
+            try {
+                cache_dir = get_core()->get_property("", ov::cache_dir);
+            } catch (std::exception&) {
+                LOG_DEBUG_TAG("Failed to get property %s from core", ov::cache_dir.name());
+            }
+        }
+        if (work_mode_auto && cache_dir.empty()) {
+            // cache disable and will read model first here
+            LOG_DEBUG_TAG("Try to read model via core from model path: %s", model_path.c_str());
+            try {
+                cloned_model = get_core()->read_model(model_path, std::string{});
+            } catch (const ov::Exception&) {
+                OPENVINO_THROW("Failed to read model from model path:%s", model_path.c_str());
+            }
             support_devices = filter_device_by_model(support_devices_by_property, cloned_model, load_config);
         } else {
+            // cache enabled and will pass model path into schedule
+            LOG_DEBUG_TAG("Will pass model path into auto schedule: %s", model_path.c_str());
             auto_s_context->m_model_path = model_path;
         }
 std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model, 
 std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model, 
 std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model, 
 std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model, 
 std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model, 
 std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model, 
     }