Skip to content

Refactor XNN workspace sharing to allow runtime gating #11748

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1866,24 +1866,24 @@ ET_NODISCARD Error XNNCompiler::compileModel(
xnn_weights_cache_t weights_cache_ptr = nullptr;
#endif

#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
ET_CHECK_OR_RETURN_ERROR(
workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
status = xnn_create_runtime_v4(
subgraph.get(),
weights_cache_ptr,
workspace,
::executorch::extension::threadpool::get_pthreadpool(),
runtime_flags,
&runtime_ptr);
#else
status = xnn_create_runtime_v3(
subgraph.get(),
weights_cache_ptr,
::executorch::extension::threadpool::get_pthreadpool(),
runtime_flags,
&runtime_ptr);
#endif
// NOLINTBEGIN(facebook-hte-NullableDereference) - weights cache is allowed to be null
if (workspace != nullptr) {
status = xnn_create_runtime_v4(
subgraph.get(),
weights_cache_ptr,
workspace,
::executorch::extension::threadpool::get_pthreadpool(),
runtime_flags,
&runtime_ptr);
} else {
status = xnn_create_runtime_v3(
subgraph.get(),
weights_cache_ptr,
::executorch::extension::threadpool::get_pthreadpool(),
runtime_flags,
&runtime_ptr);
}
// NOLINTEND(facebook-hte-NullableDereference)

ET_CHECK_OR_RETURN_ERROR(
xnn_status_success == status,
Expand Down
79 changes: 50 additions & 29 deletions backends/xnnpack/runtime/XNNPACKBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include <executorch/backends/xnnpack/runtime/XNNCompiler.h>
#include <executorch/backends/xnnpack/runtime/XNNPACKBackend.h>
#include <executorch/backends/xnnpack/runtime/XNNWeightsCache.h>
#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
Expand Down Expand Up @@ -51,21 +52,9 @@ class XnnpackBackend final
}

#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
// Create a workspace for the XNNExecutor to use. This workspace will be
// shared across all delegate instances.
ET_LOG(Debug, "Creating XNN workspace");
xnn_workspace_t workspace = nullptr;
status = xnn_create_workspace(&workspace);
if (status != xnn_status_success) {
ET_LOG(
Error,
"Failed to create XNN workspace, XNNPACK status: 0x%x",
(unsigned int)status);
workspace = nullptr;
return;
}
workspace_.reset(workspace);
ET_LOG(Debug, "Created XNN workspace: %p", workspace_.get());
enable_shared_workspace_ = true;
#else
enable_shared_workspace_ = false;
#endif // ENABLE_XNNPACK_SHARED_WORKSPACE
}

Expand All @@ -86,9 +75,29 @@ class XnnpackBackend final
const NamedDataMap* named_data_map = context.get_named_data_map();
// thread safe. This can heppen when multiple threads call init() on
// the same backend instance.
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
const std::lock_guard<std::mutex> lock(workspace_mutex_);
#endif

std::unique_lock<std::mutex> lock(workspace_mutex_, std::defer_lock);
if (enable_shared_workspace_) {
lock.lock();
if (!workspace_) {
// Create a workspace for the XNNExecutor to use. This workspace will be
// shared across all delegate instances.
ET_LOG(Debug, "Creating XNN workspace");
xnn_workspace_t workspace = nullptr;
auto status = xnn_create_workspace(&workspace);
if (status != xnn_status_success) {
ET_LOG(
Error,
"Failed to create XNN workspace, XNNPACK status: 0x%x",
(unsigned int)status);
workspace = nullptr;
return Error::Internal;
}
// NOLINTNEXTLINE(facebook-hte-NullableDereference) - false positive
workspace_.reset(workspace);
ET_LOG(Debug, "Created XNN workspace: %p", workspace_.get());
}
}

#ifdef ENABLE_XNNPACK_WEIGHTS_CACHE
const std::lock_guard<std::mutex> lock_weight_cache(weights_cache_mutex_);
Expand Down Expand Up @@ -129,9 +138,10 @@ class XnnpackBackend final
EValue** args) const override {
auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);

#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
const std::lock_guard<std::mutex> lock(workspace_mutex_);
#endif
std::unique_lock<std::mutex> lock(workspace_mutex_, std::defer_lock);
if (enable_shared_workspace_) {
lock.lock();
}

#ifdef ENABLE_XNNPACK_WEIGHTS_CACHE
const std::lock_guard<std::mutex> lock_weights_cache(weights_cache_mutex_);
Expand Down Expand Up @@ -160,9 +170,10 @@ class XnnpackBackend final
// This is needed to serialize access to xnn_delete_runtime which is not
// thread safe. This can heppen when multiple threads call destroy() on
// the same backend instance.
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
const std::lock_guard<std::mutex> lock(workspace_mutex_);
#endif
std::unique_lock<std::mutex> lock(workspace_mutex_, std::defer_lock);
if (enable_shared_workspace_) {
lock.lock();
}

auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);

Expand All @@ -181,12 +192,16 @@ class XnnpackBackend final
}
}

void set_workspace_sharing_enabled(bool enable) {
this->enable_shared_workspace_ = enable;
}

private:
bool enable_shared_workspace_;
// This is a global workspace for all delegate instances.
mutable std::mutex workspace_mutex_;
std::unique_ptr<xnn_workspace, decltype(&xnn_release_workspace)> workspace_{
nullptr,
&xnn_release_workspace};
mutable std::unique_ptr<xnn_workspace, decltype(&xnn_release_workspace)>
workspace_{nullptr, &xnn_release_workspace};

// Weights cache is global to all delegate instances.
mutable std::mutex weights_cache_mutex_;
Expand All @@ -199,10 +214,16 @@ class XnnpackBackend final
};

namespace {
auto cls = XnnpackBackend();
Backend backend{"XnnpackBackend", &cls};
auto backend_instance = XnnpackBackend();
Backend backend{"XnnpackBackend", &backend_instance};
static auto success_with_compiler = register_backend(backend);
} // namespace

namespace xnnpack {
void set_workspace_sharing_enabled(bool enable) {
backend_instance.set_workspace_sharing_enabled(enable);
}
} // namespace xnnpack

} // namespace backends
} // namespace executorch
7 changes: 7 additions & 0 deletions backends/xnnpack/runtime/XNNPACKBackend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once

#include <executorch/runtime/platform/compiler.h>

namespace executorch::backends::xnnpack {
ET_EXPERIMENTAL void set_workspace_sharing_enabled(bool enable);
}
3 changes: 3 additions & 0 deletions backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ def define_common_targets():
exported_deps = [
"//executorch/runtime/backend:interface" + aten_suffix,
],
exported_headers = [
"runtime/XNNPACKBackend.h",
],
deps = [
third_party_dep("XNNPACK"),
"//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",
Expand Down
Loading