Skip to content

Commit 911094e

Browse files
shewu-quicfacebook-github-bot
authored andcommitted
Qualcomm AI Engine Direct - enable per-tensor dump mechanism (#1294)
Summary: - Add "tensor_dump_output_path" option into compiler spec - Add "output_" for output tensors in the AOT phase. In the runtime, we fill in the output tensor based on the order of the output tensor in the context cache. If tensor_dump_output_path is given, Delegate would write outputs of each OP there in runtime. In ALL cases, we don't recommend setting this option. This option exists just for debugging some accuracy issues. Pull Request resolved: #1294 Reviewed By: kirklandsign Differential Revision: D53947214 Pulled By: cccclai fbshipit-source-id: 64cb2a0e998da87c66cc16249a54264ae3fcf046
1 parent 65f9701 commit 911094e

17 files changed

+186
-13
lines changed

backends/qualcomm/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ add_library(qnn_backend STATIC)
139139
add_library(qnn_factory STATIC)
140140
add_library(qnn_header INTERFACE)
141141
add_library(wrappers STATIC)
142+
add_library(utils STATIC)
142143

143144
#
144145
# declare dependency
@@ -228,6 +229,7 @@ target_link_libraries(qnn_manager
228229
qnn_factory
229230
wrappers
230231
qnn_schema
232+
utils
231233
)
232234
target_link_libraries(qnn_executorch_backend
233235
PRIVATE
@@ -237,6 +239,10 @@ target_link_libraries(qnn_executorch_backend
237239
executorch
238240
qcir_utils
239241
)
242+
target_link_libraries(utils
243+
PRIVATE
244+
qnn_executorch_logging
245+
)
240246

241247
#
242248
# add linker option

backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
3030
.def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
3131
.def("Compile", &PyQnnManager::Compile)
3232
.def("Destroy", &PyQnnManager::Destroy)
33-
.def("IsAvailable", &PyQnnManager::IsAvailable);
33+
.def("IsAvailable", &PyQnnManager::IsAvailable)
34+
.def("IsTensorDump", &PyQnnManager::IsTensorDump);
3435
}
3536
} // namespace qnn
3637
} // namespace executor

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ class PyQnnManager {
137137
return qnn_manager_->IsAvailable();
138138
}
139139

140+
bool IsTensorDump() {
141+
return qnn_manager_->IsTensorDump();
142+
}
143+
140144
private:
141145
// Store the bytes object instead of a raw pointer so that this module will
142146
// keep the bytes alive.

backends/qualcomm/aot/wrappers/TensorWrapper.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,19 @@ Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) {
121121
return Error::Ok;
122122
}
123123

124+
Error TensorWrapper::AllocateDataBuffer() {
125+
char* static_data_buffer = new (std::nothrow) char[bytes_]; // NOLINT
126+
if (static_data_buffer == nullptr) {
127+
return Error::Internal;
128+
}
129+
owned_data_ = std::unique_ptr<char[]>(static_data_buffer);
130+
QNN_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW;
131+
QNN_VER_PTR(tensor_)->clientBuf.dataSize = bytes_;
132+
QNN_VER_PTR(tensor_)->clientBuf.data = owned_data_.get();
133+
134+
return Error::Ok;
135+
}
136+
124137
void TensorWrapper::UpdateQnnTensorMeta(const Qnn_Tensor_t& tensor_src) {
125138
QNN_VER_PTR(tensor_)->id = QNN_VER_PTR(tensor_src)->id;
126139
}

backends/qualcomm/aot/wrappers/TensorWrapper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ class TensorWrapper {
3535

3636
Error FillDataBuffer(const void* data, bool copy_data = false);
3737

38+
Error AllocateDataBuffer();
39+
3840
// update qnn tensor meta
3941
// this function is used to recover metadata from QNN context binary.
4042
void UpdateQnnTensorMeta(const Qnn_Tensor_t& tensor_src);

backends/qualcomm/builders/node_visitor.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,14 @@ class NodeVisitor:
5757
"""
5858

5959
def __init__(
60-
self, external_ids, edge_program: torch.export.ExportedProgram
60+
self,
61+
external_ids,
62+
edge_program: torch.export.ExportedProgram,
63+
enable_tensor_dump,
6164
) -> None:
6265
self.external_ids = external_ids or {}
6366
self.edge_program = edge_program
67+
self.enable_tensor_dump = enable_tensor_dump
6468

6569
def get_tensor(self, input_node, op_node, idx=None):
6670
"""
@@ -176,6 +180,9 @@ def get_tensor_type(
176180
if is_parameter(node, self.edge_program):
177181
return PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC
178182

183+
# dump all tensor, set to app read
184+
if self.enable_tensor_dump:
185+
return PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_APP_READ
179186
return tensor_type
180187

181188
def get_data_type(
@@ -250,13 +257,16 @@ def define_value(
250257

251258
if node_name in nodes_to_wrappers:
252259
return nodes_to_wrappers[node_name]
260+
tensor_name = node.name
261+
if is_graph_output(node):
262+
tensor_name = "output_" + tensor_name
253263
dims = [1] if len(tensor.size()) == 0 else tensor.size()
254264
tensor_type = self.get_tensor_type(node, tensor_type)
255265
quant_encoding, quant_configs = self.get_quant_encoding_conf(node)
256266
dtype = self.get_data_type(tensor, quant_configs, is_tensor)
257267
if isinstance(tensor, torch._subclasses.fake_tensor.FakeTensor):
258268
tensor_wrapper = PyQnnWrapper.TensorWrapper(
259-
node_name,
269+
tensor_name,
260270
tensor_type,
261271
dtype,
262272
quant_encoding,
@@ -270,7 +280,7 @@ def define_value(
270280
if quant_configs:
271281
tensor = self.get_quant_tensor_value(node, tensor, dtype)
272282
tensor_wrapper = PyQnnWrapper.TensorWrapper(
273-
node_name,
283+
tensor_name,
274284
tensor_type,
275285
dtype,
276286
quant_encoding,
@@ -372,6 +382,7 @@ def generate_node_to_external_map(
372382

373383
def get_node_visitors(
374384
edge_program: torch.export.ExportedProgram,
385+
enable_tensor_dump=False,
375386
) -> Dict[str, NodeVisitor]:
376387
"""Create a new class instance at runtime, and put them in a dict"""
377388
node_to_external_map = generate_node_to_external_map(edge_program)
@@ -380,5 +391,7 @@ def get_node_visitors(
380391
assert callable(
381392
visitor
382393
), f"Expeting a callable class, but got {visitor} of type {type(visitor)}"
383-
node_visitors[target] = visitor(node_to_external_map, edge_program)
394+
node_visitors[target] = visitor(
395+
node_to_external_map, edge_program, enable_tensor_dump
396+
)
384397
return node_visitors

backends/qualcomm/qnn_preprocess.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,11 @@ def preprocess(
5353
pass_result = qnn_compiler_passes(edge_program.graph_module)
5454
assert pass_result is not None
5555

56+
enable_tensor_dump = qnn_manager.IsTensorDump()
5657
nodes_to_wrappers = {}
57-
node_visitors = get_node_visitors(edge_program)
58+
node_visitors = get_node_visitors(
59+
edge_program, enable_tensor_dump=enable_tensor_dump
60+
)
5861
py_op_wrapper_list = []
5962
for node in pass_result.graph_module.graph.nodes:
6063
if node.op == "call_function":

backends/qualcomm/runtime/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,11 @@ target_sources(qnn_executorch_logging
3939
PRIVATE
4040
${CMAKE_CURRENT_LIST_DIR}/Logging.cpp
4141
)
42+
43+
# utils
44+
target_sources(utils
45+
PUBLIC
46+
${CMAKE_CURRENT_LIST_DIR}/Utils.h
47+
PRIVATE
48+
${CMAKE_CURRENT_LIST_DIR}/Utils.cpp
49+
)

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,13 +194,16 @@ Error QnnExecuTorchBackend::execute(
194194
input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
195195
}
196196

197-
for (int i = input_tensors.size();
198-
i < input_tensors.size() + output_tensors.size();
199-
++i) {
200-
output_tensors[i - input_tensors.size()]->FillDataBuffer(
201-
args[i]->toTensor().mutable_data_ptr(), false /* copy_data */);
202-
output_tensor_structs.push_back(
203-
output_tensors[i - input_tensors.size()]->CloneTensorStruct());
197+
int output_index = input_tensors.size();
198+
for (const auto& output_tensor : output_tensors) {
199+
// pos=0 limits the search to the prefix
200+
if (output_tensor->GetName().rfind("output_", 0) == 0) {
201+
output_tensor->FillDataBuffer(
202+
args[output_index]->toTensor().mutable_data_ptr(),
203+
false /* copy_data */);
204+
output_index++;
205+
}
206+
output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
204207
}
205208

206209
ET_CHECK_OR_RETURN_ERROR(

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
* LICENSE file in the root directory of this source tree.
77
*/
88
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
9+
#include <executorch/backends/qualcomm/runtime/Utils.h>
910
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
1011

1112
#include <cstdlib>
1213
#include <cstring>
14+
#include <fstream>
1315
namespace torch {
1416
namespace executor {
1517
namespace qnn {
@@ -25,6 +27,7 @@ QnnManager::QnnManager(
2527
: backend_type_(options->backend_type()),
2628
library_path_(options->library_path()->c_str()),
2729
skel_library_dir_(options->skel_library_dir()->c_str()),
30+
tensor_dump_output_path_(options->tensor_dump_output_path()->c_str()),
2831
graph_name_(options->graph_name()->c_str()),
2932
soc_info_(options->soc_info()),
3033
htp_options_(options->htp_options()),
@@ -41,6 +44,9 @@ QnnManager::QnnManager(
4144
"library_path: %s", options->library_path()->c_str());
4245
QNN_EXECUTORCH_LOG_INFO(
4346
"skel_library_dir: %s", options->skel_library_dir()->c_str());
47+
QNN_EXECUTORCH_LOG_INFO(
48+
"tensor_dump_output_path: %s",
49+
options->tensor_dump_output_path()->c_str());
4450
QNN_EXECUTORCH_LOG_INFO(
4551
"log_level: %s", EnumNameQnnExecuTorchLogLevel(options->log_level()));
4652
QNN_EXECUTORCH_LOG_INFO(
@@ -144,6 +150,9 @@ Error QnnManager::AllocateTensor() {
144150
for (auto& tensor : output_tensors) {
145151
std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(tensor);
146152
tensor_wrapper->UpdateQnnTensorMeta(tensor);
153+
if (!tensor_dump_output_path_.empty()) {
154+
tensor_wrapper->AllocateDataBuffer();
155+
}
147156
output_tensors_.emplace_back(std::move(tensor_wrapper));
148157
}
149158
return Error::Ok;
@@ -153,6 +162,11 @@ Error QnnManager::AllocateTensor(
153162
std::vector<std::shared_ptr<TensorWrapper>>& inputs,
154163
std::vector<std::shared_ptr<TensorWrapper>>& outputs) {
155164
input_tensors_ = std::move(inputs);
165+
for (auto& output_tensor : outputs) {
166+
if (!tensor_dump_output_path_.empty()) {
167+
output_tensor->AllocateDataBuffer();
168+
}
169+
}
156170
output_tensors_ = std::move(outputs);
157171
return Error::Ok;
158172
}
@@ -171,6 +185,32 @@ Error QnnManager::Execute(
171185
return Error::Internal;
172186
}
173187

188+
if (!tensor_dump_output_path_.empty()) {
189+
// TODO: Need to handle the graph which is partitioned.
190+
// Maybe we could use graph name.
191+
std::string dir = tensor_dump_output_path_ + "/Result/";
192+
CreateDirectory(dir);
193+
QNN_EXECUTORCH_LOG_INFO("Dump tensor to the path: %s", dir.c_str());
194+
for (std::size_t out_idx = 0; out_idx < output_tensor_structs.size();
195+
++out_idx) {
196+
const Qnn_Tensor_t& output_tensor = output_tensor_structs[out_idx];
197+
198+
std::string output_path =
199+
dir + QNN_VER_PTR(output_tensor)->name + "_tensor.raw";
200+
201+
std::ofstream fout(output_path, std::ios::binary);
202+
if (fout.fail()) {
203+
QNN_EXECUTORCH_LOG_ERROR(
204+
"Dump tensor name: %s Failed.", QNN_VER_PTR(output_tensor)->name);
205+
return Error::Internal;
206+
}
207+
208+
fout.write(
209+
static_cast<const char*>(QNN_VER_PTR(output_tensor)->clientBuf.data),
210+
QNN_VER_PTR(output_tensor)->clientBuf.dataSize);
211+
}
212+
}
213+
174214
return Error::Ok;
175215
}
176216

backends/qualcomm/runtime/QnnManager.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ class QnnManager {
4242

4343
bool IsAvailable();
4444

45+
bool IsTensorDump() {
46+
return !tensor_dump_output_path_.empty();
47+
}
48+
4549
bool IsOnlinePrepare();
4650

4751
bool IsNodeSupportedByBackend(
@@ -68,6 +72,7 @@ class QnnManager {
6872
QnnExecuTorchBackendType backend_type_;
6973
std::string library_path_;
7074
std::string skel_library_dir_;
75+
std::string tensor_dump_output_path_;
7176
std::string graph_name_;
7277
const SocInfo* soc_info_;
7378
const QnnExecuTorchHtpBackendOptions* htp_options_;

backends/qualcomm/runtime/Utils.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
#include <executorch/backends/qualcomm/runtime/Logging.h>
9+
#include <executorch/backends/qualcomm/runtime/Utils.h>
10+
#include <sys/stat.h>
11+
namespace torch {
12+
namespace executor {
13+
namespace qnn {
14+
15+
void CreateDirectory(const std::string& path) {
16+
// Create any recursive directory
17+
if (path.empty()) {
18+
QNN_EXECUTORCH_LOG_ERROR("Create folder shouldn't be empty");
19+
return;
20+
}
21+
std::size_t pos = path.find_last_of('/');
22+
std::string subdir = (std::string::npos == pos) ? "" : path.substr(0, pos);
23+
if (subdir.empty() || subdir == "." || subdir == "..") {
24+
return;
25+
}
26+
CreateDirectory(subdir);
27+
int mkdir_err = mkdir(subdir.c_str(), S_IRWXU | S_IRWXG | S_IRWXO);
28+
if (mkdir_err != 0 && errno != EEXIST) {
29+
std::string err_msg = "Failed to create " + subdir + " folder\n";
30+
QNN_EXECUTORCH_LOG_ERROR(err_msg.c_str());
31+
}
32+
}
33+
34+
} // namespace qnn
35+
} // namespace executor
36+
} // namespace torch

backends/qualcomm/runtime/Utils.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
#pragma once
9+
10+
#include <string>
11+
12+
namespace torch {
13+
namespace executor {
14+
namespace qnn {
15+
// Create Directory
16+
void CreateDirectory(const std::string& path);
17+
18+
} // namespace qnn
19+
} // namespace executor
20+
} // namespace torch

backends/qualcomm/serialization/qnn_compile_spec_schema.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,4 @@ class QnnExecuTorchOptions:
114114
htp_options: QnnExecuTorchHtpBackendOptions = QnnExecuTorchHtpBackendOptions()
115115
soc_info: SocInfo = SocInfo()
116116
online_prepare: bool = False
117+
tensor_dump_output_path: str = ""

backends/qualcomm/serialization/schema.fbs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ table QnnExecuTorchOptions {
140140

141141
/// Check if on-device graph construction. Default is false.
142142
online_prepare:bool;
143+
144+
/// Tensor dump output path. If a path is given, Delegate would write
145+
/// outputs of each OP there.
146+
/// In ALL cases, we don't recommend to set this option.
147+
/// This option exist just for debugging some accuracy issues.
148+
tensor_dump_output_path:string;
143149
}
144150

145151
root_type QnnExecuTorchOptions;

0 commit comments

Comments
 (0)