Skip to content

[ExecuTorch][Weight Sharing][XNNPACK] load named data map data for xnnpack #9152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 46 additions & 10 deletions backends/xnnpack/runtime/XNNCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <executorch/backends/xnnpack/runtime/XNNHeader.h>
#include <executorch/backends/xnnpack/serialization/schema_generated.h>
#include <executorch/extension/threadpool/threadpool.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
#include <executorch/runtime/executor/pte_data_map.h>
#include <unordered_map>

#pragma clang diagnostic ignored "-Wmissing-prototypes"
Expand All @@ -22,7 +22,9 @@ namespace xnnpack {
namespace delegate {

using executorch::runtime::Error;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::MemoryAllocator;
using executorch::runtime::NamedDataMap;
using executorch::runtime::Result;

/*
Expand All @@ -48,6 +50,7 @@ class CompileAllocator {
using ValuePtr = const fb_xnnpack::XValue*;
using NodePtr = const fb_xnnpack::XNode*;
using GraphPtr = const fb_xnnpack::XNNGraph*;
using ConstantDataOffsetPtr = const fb_xnnpack::ConstantDataOffset*;
using DataType = fb_xnnpack::XNNDatatype;

// Type for define node function. This is the function signature
Expand Down Expand Up @@ -162,7 +165,9 @@ data associated with the tensor value, then returns nullptr.
const uint8_t* getConstantDataPtr(
const fb_xnnpack::XNNTensorValue* tensor_value,
GraphPtr flatbuffer_graph,
const uint8_t* constant_data_ptr) {
const uint8_t* constant_data_ptr,
const NamedDataMap* named_data_map,
std::vector<FreeableBuffer>& loaded_buffers_from_map) {
auto buffer_idx = tensor_value->constant_buffer_idx();
if (buffer_idx) {
if (!constant_data_ptr) {
Expand All @@ -171,10 +176,31 @@ const uint8_t* getConstantDataPtr(
const auto& constant_buffer = *flatbuffer_graph->constant_buffer();
return constant_buffer[buffer_idx]->storage()->data();
} else {
const auto& constant_data_offsets = *flatbuffer_graph->constant_data();
uint64_t constant_data_offset =
constant_data_offsets[buffer_idx]->offset();
return constant_data_ptr + constant_data_offset;
ConstantDataOffsetPtr constant_data_offset =
flatbuffer_graph->constant_data()->Get(buffer_idx);
uint64_t offset = constant_data_offset->offset();

bool has_named_key = flatbuffers::IsFieldPresent(
constant_data_offset, fb_xnnpack::ConstantDataOffset::VT_NAMED_KEY);
// If there is no tensor name
if (!has_named_key) {
return constant_data_ptr + offset;
} else {
const std::string& data_name = constant_data_offset->named_key()->str();
Result<FreeableBuffer> buffer =
named_data_map->get_data(data_name.c_str());
if (!buffer.ok()) {
ET_LOG(
Error,
"Failed to get constant data for key %s",
data_name.c_str());
return nullptr;
}
const uint8_t* data_ptr =
static_cast<const uint8_t*>(buffer.get().data());
loaded_buffers_from_map.push_back(std::move(buffer.get()));
return data_ptr;
}
}
}

Expand All @@ -194,7 +220,9 @@ Error defineTensor(
const uint8_t* constant_data_ptr,
std::vector<uint32_t>& input_ids,
std::vector<uint32_t>& output_ids,
CompileAllocator& allocator) {
CompileAllocator& allocator,
const NamedDataMap* named_data_map,
std::vector<FreeableBuffer>& loaded_buffers_from_map) {
const fb_xnnpack::XNNTensorValue* tensor_value = nullptr;
const fb_xnnpack::XNNQuantizedTensorValue* qtensor_value = nullptr;

Expand Down Expand Up @@ -231,8 +259,12 @@ Error defineTensor(

// Get Pointer to constant data from flatbuffer, if its non-constant
// it is a nullptr
const uint8_t* buffer_ptr =
getConstantDataPtr(tensor_value, flatbuffer_graph, constant_data_ptr);
const uint8_t* buffer_ptr = getConstantDataPtr(
tensor_value,
flatbuffer_graph,
constant_data_ptr,
named_data_map,
loaded_buffers_from_map);

xnn_status status;
// The type we might have to convert to
Expand Down Expand Up @@ -1968,6 +2000,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
size_t num_bytes,
XNNExecutor* executor,
MemoryAllocator* runtime_allocator,
const NamedDataMap* named_data_map,
xnn_workspace_t workspace) {
Result<XNNHeader> header = XNNHeader::Parse(buffer_pointer, num_bytes);
const uint8_t* flatbuffer_data = nullptr;
Expand Down Expand Up @@ -2036,6 +2069,7 @@ ET_NODISCARD Error XNNCompiler::compileModel(
std::vector<uint32_t> input_ids;
std::vector<uint32_t> output_ids;
Error err = Error::Ok;
std::vector<FreeableBuffer> loaded_buffers_from_map;
for (auto value : *flatbuffer_graph->xvalues()) {
err = defineTensor(
subgraph.get(),
Expand All @@ -2045,7 +2079,9 @@ ET_NODISCARD Error XNNCompiler::compileModel(
constant_data,
input_ids,
output_ids,
compile_allocator);
compile_allocator,
named_data_map,
loaded_buffers_from_map);

if (err != Error::Ok) {
return err;
Expand Down
1 change: 1 addition & 0 deletions backends/xnnpack/runtime/XNNCompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class XNNCompiler {
size_t num_bytes,
XNNExecutor* executor,
executorch::runtime::MemoryAllocator* runtime_allocator,
const executorch::runtime::NamedDataMap* named_data_map,
xnn_workspace_t workspace);
};

Expand Down
7 changes: 5 additions & 2 deletions backends/xnnpack/runtime/XNNPACKBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <executorch/runtime/backend/interface.h>
#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/evalue.h>
#include <executorch/runtime/platform/profiler.h>
#include <executorch/runtime/executor/pte_data_map.h>

#include <memory>
#include <mutex>
Expand All @@ -29,6 +29,7 @@ using executorch::runtime::DelegateHandle;
using executorch::runtime::Error;
using executorch::runtime::EValue;
using executorch::runtime::FreeableBuffer;
using executorch::runtime::NamedDataMap;
using executorch::runtime::Result;

class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
Expand Down Expand Up @@ -79,13 +80,14 @@ class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
return Error::MemoryAllocationFailed;
}

const NamedDataMap* named_data_map = context.get_named_data_map();

#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
// This is needed to serialize access to xnn_create_runtime which is not
// thread safe. This can heppen when multiple threads call init() on
// the same backend instance.
const std::lock_guard<std::mutex> lock(workspace_mutex_);
#endif

// Executor has been allocated but not constructed, ensure that runtime_ is
// nullptr by constructing it in place here. NOTE: Since we use placement
// new and since this type is not trivially destructible, we must call the
Expand All @@ -96,6 +98,7 @@ class XnnpackBackend final : public ::executorch::runtime::BackendInterface {
processed->size(),
executor,
context.get_runtime_allocator(),
named_data_map,
workspace_.get());
// This backend does not need its processed data after compiling the model.
processed->Free();
Expand Down
9 changes: 9 additions & 0 deletions backends/xnnpack/serialization/runtime_schema.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,20 @@ table XNNLeakyReLU {
table ConstantDataOffset {
// Constant data offsets are relative to the constant data base offset provided
// in the XNNPACKHeader.
// named_key and offset are mutually exclusive, meaning only one of these values
// are valid. If the named key is a non-empty string, then the offset must be UINT64_MAX.
// If the offset is not UINT64_MAX, then the named key must be an empty string
offset: uint64;

// The size in bytes of valid data starting at the offset. The constant data
// may be followed by padding before the next piece of constant data
size: uint64;

// unique string id used to query the offset from the named data store.
// named_key and offset are mutually exclusive, meaning only one of these values
// are valid. If the named key is a non-empty string, then the offset must be UINT64_MAX.
// If the offset is not UINT64_MAX, then the named key must be an empty string
named_key: string;
}

table XNNGraph {
Expand Down
1 change: 1 addition & 0 deletions backends/xnnpack/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def define_common_targets():
"//executorch/backends/xnnpack/serialization:xnnpack_flatbuffer_header",
"//executorch/extension/threadpool:threadpool",
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/runtime/executor:pte_data_map"
],
# XnnpackBackend.cpp needs to compile with executor as whole
# @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
Expand Down
Loading