Skip to content

Commit 6761ffb

Browse files
committed
Qualcomm AI Engine Direct - multi-method support
Summary - refactor to adopt multi-method change - framwork change to meet use case
1 parent 4e38f4a commit 6761ffb

File tree

20 files changed

+231
-832
lines changed

20 files changed

+231
-832
lines changed

backends/qualcomm/CMakeLists.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,12 @@ target_link_libraries(qnn_executorch_logging PRIVATE qnn_schema)
153153
target_link_libraries(qnn_profiler PRIVATE qnn_executorch_logging)
154154
target_link_libraries(qnn_logger PRIVATE qnn_implementation ${android_log})
155155
target_link_libraries(qnn_backend PRIVATE qnn_implementation qnn_logger)
156-
target_link_libraries(qnn_custom_protocol PRIVATE qcir_utils)
156+
target_link_libraries(qnn_custom_protocol PRIVATE qnn_logger)
157157
target_link_libraries(
158158
qnn_device PRIVATE qnn_executorch_logging qnn_implementation qnn_logger
159159
)
160160
target_link_libraries(
161-
qnn_backend_cache PRIVATE qnn_sys_implementation qcir_utils
161+
qnn_backend_cache PRIVATE qnn_sys_implementation
162162
)
163163
target_link_libraries(
164164
qnn_context PRIVATE qnn_implementation qnn_logger qnn_backend qnn_device
@@ -184,7 +184,7 @@ target_link_libraries(
184184
)
185185
target_link_libraries(
186186
qnn_executorch_backend PRIVATE qnn_executorch_header qnn_schema qnn_manager
187-
executorch_core qcir_utils extension_tensor
187+
executorch_core extension_tensor
188188
)
189189
set_target_properties(
190190
qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
@@ -243,7 +243,6 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
243243
qnn_manager
244244
qnn_executorch_header
245245
executorch
246-
qcir_utils
247246
extension_tensor
248247
)
249248
target_link_libraries(

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,14 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
3030
py::class_<PyQnnManager, std::shared_ptr<PyQnnManager>>(m, "QnnManager")
3131
.def(py::init<const py::bytes&>())
3232
.def(py::init<const py::bytes&, const py::bytes&>())
33-
.def(py::init<const py::bytes&, const py::list&>())
3433
.def("Init", &PyQnnManager::Init)
3534
.def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
36-
.def("Compile", py::overload_cast<>(&PyQnnManager::Compile))
3735
.def(
3836
"Compile",
3937
py::overload_cast<
40-
const std::string&,
41-
std::vector<std::shared_ptr<OpWrapper>>&>(&PyQnnManager::Compile))
38+
const std::vector<std::string>&,
39+
std::vector<std::vector<std::shared_ptr<OpWrapper>>>&>(
40+
&PyQnnManager::Compile))
4241
.def("Destroy", &PyQnnManager::Destroy)
4342
.def("IsAvailable", &PyQnnManager::IsAvailable)
4443
.def("IsTensorDump", &PyQnnManager::IsTensorDump)

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 11 additions & 247 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88
#pragma once
9-
#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
109
#include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
1110
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1211
#include <executorch/backends/qualcomm/runtime/Logging.h>
@@ -50,119 +49,6 @@ class PyQnnManager {
5049
qnn_executorch_options, qnn_executorch_context_binary_);
5150
}
5251

53-
// used during stage 2 of multi-graph mode
54-
explicit PyQnnManager(const py::bytes& buffer, const py::list& qcirs)
55-
: qnn_executorch_option_ptr_(buffer) {
56-
auto qnn_executorch_options = GetQnnExecuTorchOptions(
57-
qnn_executorch_option_ptr_.cast<std::string_view>().data());
58-
59-
// merge multiple qcirs into one context with multiple graphs
60-
61-
// We start retrieving tensor from offsets = 0.
62-
std::vector<uint32_t> offsets(1, 0);
63-
std::vector<uint8_t> tensor_data;
64-
std::vector<uint8_t*> tensor_ptr;
65-
std::vector<uint64_t> tensor_size;
66-
uint64_t total_tensor_size = 0;
67-
for (size_t i = 0; i < qcirs.size(); ++i) {
68-
py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
69-
70-
uint8_t* qcir_custom_buffer_ptr = static_cast<uint8_t*>(info.ptr);
71-
QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72-
auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73-
qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer(
74-
qcir_custom_buffer_ptr);
75-
76-
if (status != Error::Ok) {
77-
QNN_EXECUTORCH_LOG_ERROR("Fail to verify QnnQcirCustomProtocol");
78-
return;
79-
}
80-
81-
tensor_ptr.push_back(static_cast<uint8_t*>(qcir_tensor_ptr));
82-
tensor_size.push_back(qcir_tensor_size);
83-
total_tensor_size += qcir_tensor_size;
84-
offsets.push_back(offsets.back() + qcir_tensor_size);
85-
}
86-
87-
tensor_data.resize(total_tensor_size);
88-
89-
// store multiple graphs tensor in a contiguous memory space
90-
for (size_t i = 0; i < tensor_ptr.size(); ++i) {
91-
std::memcpy(
92-
tensor_data.data() + offsets[i], tensor_ptr[i], tensor_size[i]);
93-
}
94-
95-
std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
96-
for (size_t i = 0; i < qcirs.size(); ++i) {
97-
py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
98-
99-
uint8_t* qcir_custom_buffer_ptr = static_cast<uint8_t*>(info.ptr);
100-
QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101-
auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102-
qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer(
103-
qcir_custom_buffer_ptr);
104-
105-
if (status != Error::Ok) {
106-
QNN_EXECUTORCH_LOG_ERROR("Fail to verify QnnQcirCustomProtocol");
107-
return;
108-
}
109-
110-
auto context = qcir::GetContext(qcir_fbs_ptr);
111-
for (const auto& graph : *context->graphs()) {
112-
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
113-
for (const auto tensor : *graph->tensors()) {
114-
// here we need to take a detour to merge multiple qcir flatbuffers
115-
// outer ToTensor
116-
// return: flatbuffers::Offset<Tensor>
117-
// consume: QnnTensor, data_offset, flatbuffers::FlatBufferBuilder*
118-
// inner ToTensor
119-
// return: QnnTensor
120-
// consume:
121-
// flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>,
122-
// data_ptr
123-
tensors.emplace_back(ToTensor(
124-
ToTensor(tensor, nullptr),
125-
offsets[i] + tensor->offset(),
126-
&builder_));
127-
}
128-
std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
129-
for (const auto& node : *graph->nodes()) {
130-
uint32_t* inputs_ptr = const_cast<uint32_t*>(node->inputs()->data());
131-
uint32_t* outputs_ptr =
132-
const_cast<uint32_t*>(node->outputs()->data());
133-
uint32_t* params_ptr = const_cast<uint32_t*>(node->params()->data());
134-
std::vector<uint32_t> inputs(
135-
inputs_ptr, inputs_ptr + node->inputs()->size());
136-
std::vector<uint32_t> outputs(
137-
outputs_ptr, outputs_ptr + node->outputs()->size());
138-
std::vector<uint32_t> params(
139-
params_ptr, params_ptr + node->params()->size());
140-
nodes.emplace_back(qcir::CreateOperatorDirect(
141-
builder_,
142-
node->name()->str().c_str(),
143-
node->package_name()->str().c_str(),
144-
node->type_name()->str().c_str(),
145-
&inputs,
146-
&outputs,
147-
&params));
148-
}
149-
graphs.emplace_back(qcir::CreateGraphDirect(
150-
builder_, graph->name()->str().c_str(), &nodes, &tensors));
151-
}
152-
}
153-
154-
auto context = qcir::CreateContextDirect(builder_, &graphs);
155-
builder_.Finish(context);
156-
QnnExecuTorchContextBinary qcir_bin(
157-
{builder_.GetBufferPointer(), builder_.GetSize()});
158-
159-
// Init QnnQcirCustomProtocol binary
160-
qnn_executorch_context_binary_ =
161-
MakeQcirCustomBinaryInfo(qcir_bin, tensor_data);
162-
qnn_manager_ = std::make_shared<QnnManager>(
163-
qnn_executorch_options, qnn_executorch_context_binary_);
164-
}
165-
16652
executorch::runtime::Error Init() {
16753
return qnn_manager_->Init();
16854
}
@@ -172,146 +58,24 @@ class PyQnnManager {
17258
return qnn_manager_->IsNodeSupportedByBackend(op_wrappers);
17359
}
17460

175-
// this method is specific for stage 2 of compiling multi-graphs
176-
py::array_t<char> Compile() {
177-
if (qnn_manager_->CompileQcir() != Error::Ok) {
178-
QNN_EXECUTORCH_LOG_ERROR("Fail to compile qcir");
179-
return py::array_t<char>(0);
180-
}
181-
182-
// generate context binary if compilation succeded
183-
QnnExecuTorchContextBinary binary_info;
184-
qnn_manager_->GetContextBinary(binary_info);
185-
// allocate py::array (to pass the result of the C++ function to Python)
186-
auto result = py::array_t<char>(binary_info.nbytes);
187-
auto result_buffer = result.request();
188-
char* result_ptr = (char*)result_buffer.ptr;
189-
std::memcpy(result_ptr, binary_info.buffer, binary_info.nbytes);
190-
return result;
191-
}
192-
19361
py::array_t<char> Compile(
194-
const std::string& graph_name,
195-
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
62+
const std::vector<std::string>& graph_names,
63+
std::vector<std::vector<std::shared_ptr<OpWrapper>>>& op_wrappers) {
19664
QnnExecuTorchContextBinary binary_info;
19765

198-
if (qnn_manager_->IsMultipleGraphs()) {
199-
builder_.Reset();
200-
std::vector<uint8_t> tensor_data;
201-
std::vector<uint64_t> offsets;
202-
std::unordered_map<void*, int> tensor_map;
203-
std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
204-
std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
205-
206-
auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
207-
std::vector<uint32_t>& index) {
208-
auto it = tensor_map.find(wrapper.get());
209-
if (it != tensor_map.end()) {
210-
index.push_back(it->second);
211-
} else {
212-
tensor_map[wrapper.get()] = fb_tensors.size();
213-
index.push_back(fb_tensors.size());
214-
offsets.push_back(tensor_data.size());
215-
Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct();
216-
fb_tensors.emplace_back(
217-
ToTensor(qnn_tensor, offsets.back(), &builder_));
218-
uint8_t* data_ptr = static_cast<uint8_t*>(
219-
QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.data);
220-
if (data_ptr != nullptr) {
221-
tensor_data.insert(
222-
tensor_data.end(),
223-
data_ptr,
224-
data_ptr + QNN_TENSOR_VER_PTR(qnn_tensor)->clientBuf.dataSize);
225-
}
226-
}
227-
};
228-
229-
for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
230-
std::vector<uint32_t> inputs, outputs, params;
231-
232-
for (const auto& tensor_wrapper : op_wrapper->GetInputTensors()) {
233-
set_tensor(tensor_wrapper, inputs);
234-
}
235-
236-
for (const auto& tensor_wrapper : op_wrapper->GetOutputTensors()) {
237-
set_tensor(tensor_wrapper, outputs);
238-
}
239-
240-
for (const auto& param : op_wrapper->GetParams()) {
241-
auto* p_tensor_param = dynamic_cast<TensorParamWrapper*>(param.get());
242-
if (p_tensor_param != nullptr) {
243-
auto wrapper = p_tensor_param->GetTensorWrapper();
244-
wrapper->SetName(param->GetName());
245-
set_tensor(wrapper, params);
246-
} else {
247-
executorch::runtime::Error err = param->PopulateQnnParam();
248-
if (err != executorch::runtime::Error::Ok) {
249-
QNN_EXECUTORCH_LOG_ERROR(
250-
"Fail to get scalar parameter in online prepare stage");
251-
return py::array_t<char>(0);
252-
}
253-
Qnn_Param_t p = param->GetQnnParam();
254-
Qnn_Tensor_t t(
255-
{.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
256-
QNN_TENSOR_VER_PTR(t)->name = p.name;
257-
QNN_TENSOR_VER_PTR(t)->dataType = p.scalarParam.dataType;
258-
QNN_TENSOR_VER_PTR(t)->clientBuf.data =
259-
static_cast<void*>(&p.scalarParam.uint8Value);
260-
QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize =
261-
GetDataTypeSize(QNN_TENSOR_VER_PTR(t)->dataType);
262-
263-
// collect tensor data
264-
offsets.push_back(tensor_data.size());
265-
const uint8_t* data_ptr =
266-
static_cast<uint8_t*>(QNN_TENSOR_VER_PTR(t)->clientBuf.data);
267-
tensor_data.insert(
268-
tensor_data.end(),
269-
data_ptr,
270-
data_ptr + QNN_TENSOR_VER_PTR(t)->clientBuf.dataSize);
271-
params.push_back(fb_tensors.size());
272-
fb_tensors.emplace_back(ToTensor(t, offsets.back(), &builder_));
273-
}
274-
}
275-
276-
Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig();
277-
fb_ops.emplace_back(qcir::CreateOperatorDirect(
278-
builder_,
279-
QNN_OP_VER_PTR(op_config)->name,
280-
QNN_OP_VER_PTR(op_config)->packageName,
281-
QNN_OP_VER_PTR(op_config)->typeName,
282-
&inputs,
283-
&outputs,
284-
&params));
285-
}
286-
287-
std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs(
288-
{qcir::CreateGraphDirect(
289-
builder_, graph_name.c_str(), &fb_ops, &fb_tensors)});
290-
auto context = qcir::CreateContextDirect(builder_, &fb_graphs);
291-
builder_.Finish(context);
292-
293-
QnnExecuTorchContextBinary qcir_binary(
294-
{builder_.GetBufferPointer(), builder_.GetSize()});
295-
296-
custom_qcir_protocol_buffer_ =
297-
QnnQcirCustomProtocol(qcir_binary.nbytes, tensor_data.size());
298-
custom_qcir_protocol_buffer_.BuildQcirCustomBuffer(
299-
qcir_binary, tensor_data);
300-
std::tie(binary_info.buffer, binary_info.nbytes) =
301-
custom_qcir_protocol_buffer_.GetCustomProtocolBuffer();
302-
} else {
303-
if (qnn_manager_->Compile(graph_name, op_wrappers) !=
66+
for (int i = 0; i < graph_names.size(); ++i) {
67+
if (qnn_manager_->Compile(graph_names[i], op_wrappers[i]) !=
30468
executorch::runtime::Error::Ok) {
30569
QNN_EXECUTORCH_LOG_ERROR("Fail to compile QNN graph");
30670
return py::array_t<char>(0);
30771
}
308-
auto qnn_executorch_options = GetQnnExecuTorchOptions(
309-
qnn_executorch_option_ptr_.cast<std::string_view>().data());
310-
if (qnn_executorch_options->saver() ||
311-
qnn_manager_->GetContextBinary(binary_info) !=
312-
executorch::runtime::Error::Ok) {
313-
return py::array_t<char>(0);
314-
}
72+
}
73+
auto qnn_executorch_options = GetQnnExecuTorchOptions(
74+
qnn_executorch_option_ptr_.cast<std::string_view>().data());
75+
if (qnn_executorch_options->saver() ||
76+
qnn_manager_->GetContextBinary(binary_info) !=
77+
executorch::runtime::Error::Ok) {
78+
return py::array_t<char>(0);
31579
}
31680

31781
// allocate py::array (to pass the result of the C++ function to Python)

0 commit comments

Comments
 (0)