(WIP) Refactor XNN workspace sharing to allow runtime gating (#11748)

GregoryComer · facebook-github-bot · commit af3e5b7a5d6c · 2025-06-17T13:34:31.000-07:00
Summary: Refactor the XNN backend workspace sharing logic to allow runtime gating. I've also added a temporary (marked experimental) API to enable workspace sharing. This will be replaced with backend options once available. Pull Request resolved: #11748 Test Plan: CI Rollback Plan: Differential Revision: D76789804 Pulled By: GregoryComer
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -1866,24 +1866,24 @@ ET_NODISCARD Error XNNCompiler::compileModel(
   xnn_weights_cache_t weights_cache_ptr = nullptr;
 #endif
 
-#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
-  ET_CHECK_OR_RETURN_ERROR(
-      workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
-  status = xnn_create_runtime_v4(
-      subgraph.get(),
-      weights_cache_ptr,
-      workspace,
-      ::executorch::extension::threadpool::get_pthreadpool(),
-      runtime_flags,
-      &runtime_ptr);
-#else
-  status = xnn_create_runtime_v3(
-      subgraph.get(),
-      weights_cache_ptr,
-      ::executorch::extension::threadpool::get_pthreadpool(),
-      runtime_flags,
-      &runtime_ptr);
-#endif
+  // NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to be null
+  if (workspace != nullptr) {
+    status = xnn_create_runtime_v4(
+        subgraph.get(),
+        weights_cache_ptr, 
+        workspace,
+        ::executorch::extension::threadpool::get_pthreadpool(),
+        runtime_flags,
+        &runtime_ptr);
+  } else {
+    status = xnn_create_runtime_v3(
+        subgraph.get(),
+        weights_cache_ptr,
+        ::executorch::extension::threadpool::get_pthreadpool(),
+        runtime_flags,
+        &runtime_ptr);
+  }
+  // NOLINTEND(facebook-hte-NullableDereference)
 
   ET_CHECK_OR_RETURN_ERROR(
       xnn_status_success == status,
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp
@@ -7,6 +7,7 @@
  */
 
 #include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
+#include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>
 #include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
@@ -51,21 +52,9 @@ class XnnpackBackend final
     }
 
 #ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
-    // Create a workspace for the XNNExecutor to use. This workspace will be
-    // shared across all delegate instances.
-    ET_LOG(Debug, "Creating XNN workspace");
-    xnn_workspace_t workspace = nullptr;
-    status = xnn_create_workspace(&workspace);
-    if (status != xnn_status_success) {
-      ET_LOG(
-          Error,
-          "Failed to create XNN workspace, XNNPACK status: 0x%x",
-          (unsigned int)status);
-      workspace = nullptr;
-      return;
-    }
-    workspace_.reset(workspace);
-    ET_LOG(Debug, "Created XNN workspace: %p", workspace_.get());
+    enable_shared_workspace_ = true;
+#else
+    enable_shared_workspace_ = false;
 #endif // ENABLE_XNNPACK_SHARED_WORKSPACE
   }
 
@@ -86,9 +75,29 @@ class XnnpackBackend final
     const NamedDataMap* named_data_map = context.get_named_data_map();
     // thread safe. This can heppen when multiple threads call init() on
     // the same backend instance.
-#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
-    const std::lock_guard<std::mutex> lock(workspace_mutex_);
-#endif
+
+    std::unique_lock<std::mutex> lock(workspace_mutex_, std::defer_lock);
+    if (enable_shared_workspace_) {
+      lock.lock();
+      if (!workspace_) {
+        // Create a workspace for the XNNExecutor to use. This workspace will be
+        // shared across all delegate instances.
+        ET_LOG(Debug, "Creating XNN workspace");
+        xnn_workspace_t workspace = nullptr;
+        auto status = xnn_create_workspace(&workspace);
+        if (status != xnn_status_success) {
+          ET_LOG(
+              Error,
+              "Failed to create XNN workspace, XNNPACK status: 0x%x",
+              (unsigned int)status);
+          workspace = nullptr;
+          return Error::Internal;
+        }
+        // NOLINTNEXTLINE(facebook-hte-NullableDereference) - false positive
+        workspace_.reset(workspace);
+        ET_LOG(Debug, "Created XNN workspace: %p", workspace_.get());
+      }
+    }
 
 #ifdef ENABLE_XNNPACK_WEIGHTS_CACHE
     const std::lock_guard<std::mutex> lock_weight_cache(weights_cache_mutex_);
@@ -129,9 +138,10 @@ class XnnpackBackend final
       EValue** args) const override {
     auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
-#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
-    const std::lock_guard<std::mutex> lock(workspace_mutex_);
-#endif
+    std::unique_lock<std::mutex> lock(workspace_mutex_, std::defer_lock);
+    if (enable_shared_workspace_) {
+      lock.lock();
+    }
 
 #ifdef ENABLE_XNNPACK_WEIGHTS_CACHE
     const std::lock_guard<std::mutex> lock_weights_cache(weights_cache_mutex_);
@@ -160,9 +170,10 @@ class XnnpackBackend final
       // This is needed to serialize access to xnn_delete_runtime which is not
       // thread safe. This can heppen when multiple threads call destroy() on
       // the same backend instance.
-#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
-      const std::lock_guard<std::mutex> lock(workspace_mutex_);
-#endif
+      std::unique_lock<std::mutex> lock(workspace_mutex_, std::defer_lock);
+      if (enable_shared_workspace_) {
+        lock.lock();
+      }
 
       auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
 
@@ -181,12 +192,16 @@ class XnnpackBackend final
     }
   }
 
+  void set_workspace_sharing_enabled(bool enable) {
+    this->enable_shared_workspace_ = enable;
+  }
+
  private:
+  bool enable_shared_workspace_;
   // This is a global workspace for all delegate instances.
   mutable std::mutex workspace_mutex_;
-  std::unique_ptr<xnn_workspace, decltype(&xnn_release_workspace)> workspace_{
-      nullptr,
-      &xnn_release_workspace};
+  mutable std::unique_ptr<xnn_workspace, decltype(&xnn_release_workspace)>
+      workspace_{nullptr, &xnn_release_workspace};
 
   // Weights cache is global to all delegate instances.
   mutable std::mutex weights_cache_mutex_;
@@ -199,10 +214,16 @@ class XnnpackBackend final
 };
 
 namespace {
-auto cls = XnnpackBackend();
-Backend backend{"XnnpackBackend", &cls};
+auto backend_instance = XnnpackBackend();
+Backend backend{"XnnpackBackend", &backend_instance};
 static auto success_with_compiler = register_backend(backend);
 } // namespace
 
+namespace xnnpack {
+void set_workspace_sharing_enabled(bool enable) {
+  backend_instance.set_workspace_sharing_enabled(enable);
+}
+} // namespace xnnpack
+
 } // namespace backends
 } // namespace executorch
diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <executorch/runtime/platform/compiler.h>
+
+namespace executorch::backends::xnnpack {
+ET_EXPERIMENTAL void set_workspace_sharing_enabled(bool enable);
+}
diff --git a/backends/xnnpack/targets.bzl b/backends/xnnpack/targets.bzl
@@ -61,6 +61,9 @@ def define_common_targets():
             exported_deps = [
                 "//executorch/runtime/backend:interface" + aten_suffix,
             ],
+            exported_headers = [
+                "runtime/XNNPACKBackend.h",
+            ],
             deps = [
                 third_party_dep("XNNPACK"),
                 "//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",