6
6
* LICENSE file in the root directory of this source tree.
7
7
*/
8
8
#pragma once
9
- #include < executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10
9
#include < executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11
10
#include < executorch/backends/qualcomm/qc_compiler_spec_generated.h>
12
11
#include < executorch/backends/qualcomm/runtime/Logging.h>
@@ -50,119 +49,6 @@ class PyQnnManager {
50
49
qnn_executorch_options, qnn_executorch_context_binary_);
51
50
}
52
51
53
- // used during stage 2 of multi-graph mode
54
- explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
55
- : qnn_executorch_option_ptr_(buffer) {
56
- auto qnn_executorch_options = GetQnnExecuTorchOptions (
57
- qnn_executorch_option_ptr_.cast <std::string_view>().data ());
58
-
59
- // merge multiple qcirs into one context with multiple graphs
60
-
61
- // We start retrieving tensor from offsets = 0.
62
- std::vector<uint32_t > offsets (1 , 0 );
63
- std::vector<uint8_t > tensor_data;
64
- std::vector<uint8_t *> tensor_ptr;
65
- std::vector<uint64_t > tensor_size;
66
- uint64_t total_tensor_size = 0 ;
67
- for (size_t i = 0 ; i < qcirs.size (); ++i) {
68
- py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
69
-
70
- uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
71
- QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72
- auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73
- qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
74
- qcir_custom_buffer_ptr);
75
-
76
- if (status != Error::Ok) {
77
- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
78
- return ;
79
- }
80
-
81
- tensor_ptr.push_back (static_cast <uint8_t *>(qcir_tensor_ptr));
82
- tensor_size.push_back (qcir_tensor_size);
83
- total_tensor_size += qcir_tensor_size;
84
- offsets.push_back (offsets.back () + qcir_tensor_size);
85
- }
86
-
87
- tensor_data.resize (total_tensor_size);
88
-
89
- // store multiple graphs tensor in a contiguous memory space
90
- for (size_t i = 0 ; i < tensor_ptr.size (); ++i) {
91
- std::memcpy (
92
- tensor_data.data () + offsets[i], tensor_ptr[i], tensor_size[i]);
93
- }
94
-
95
- std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
96
- for (size_t i = 0 ; i < qcirs.size (); ++i) {
97
- py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
98
-
99
- uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
100
- QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101
- auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102
- qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
103
- qcir_custom_buffer_ptr);
104
-
105
- if (status != Error::Ok) {
106
- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
107
- return ;
108
- }
109
-
110
- auto context = qcir::GetContext (qcir_fbs_ptr);
111
- for (const auto & graph : *context->graphs ()) {
112
- std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
113
- for (const auto tensor : *graph->tensors ()) {
114
- // here we need to take a detour to merge multiple qcir flatbuffers
115
- // outer ToTensor
116
- // return: flatbuffers::Offset<Tensor>
117
- // consume: QnnTensor, data_offset, flatbuffers::FlatBufferBuilder*
118
- // inner ToTensor
119
- // return: QnnTensor
120
- // consume:
121
- // flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>,
122
- // data_ptr
123
- tensors.emplace_back (ToTensor (
124
- ToTensor (tensor, nullptr ),
125
- offsets[i] + tensor->offset (),
126
- &builder_));
127
- }
128
- std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
129
- for (const auto & node : *graph->nodes ()) {
130
- uint32_t * inputs_ptr = const_cast <uint32_t *>(node->inputs ()->data ());
131
- uint32_t * outputs_ptr =
132
- const_cast <uint32_t *>(node->outputs ()->data ());
133
- uint32_t * params_ptr = const_cast <uint32_t *>(node->params ()->data ());
134
- std::vector<uint32_t > inputs (
135
- inputs_ptr, inputs_ptr + node->inputs ()->size ());
136
- std::vector<uint32_t > outputs (
137
- outputs_ptr, outputs_ptr + node->outputs ()->size ());
138
- std::vector<uint32_t > params (
139
- params_ptr, params_ptr + node->params ()->size ());
140
- nodes.emplace_back (qcir::CreateOperatorDirect (
141
- builder_,
142
- node->name ()->str ().c_str (),
143
- node->package_name ()->str ().c_str (),
144
- node->type_name ()->str ().c_str (),
145
- &inputs,
146
- &outputs,
147
- ¶ms));
148
- }
149
- graphs.emplace_back (qcir::CreateGraphDirect (
150
- builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
151
- }
152
- }
153
-
154
- auto context = qcir::CreateContextDirect (builder_, &graphs);
155
- builder_.Finish (context);
156
- QnnExecuTorchContextBinary qcir_bin (
157
- {builder_.GetBufferPointer (), builder_.GetSize ()});
158
-
159
- // Init QnnQcirCustomProtocol binary
160
- qnn_executorch_context_binary_ =
161
- MakeQcirCustomBinaryInfo (qcir_bin, tensor_data);
162
- qnn_manager_ = std::make_shared<QnnManager>(
163
- qnn_executorch_options, qnn_executorch_context_binary_);
164
- }
165
-
166
52
executorch::runtime::Error Init () {
167
53
return qnn_manager_->Init ();
168
54
}
@@ -172,146 +58,24 @@ class PyQnnManager {
172
58
return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
173
59
}
174
60
175
- // this method is specific for stage 2 of compiling multi-graphs
176
- py::array_t <char > Compile () {
177
- if (qnn_manager_->CompileQcir () != Error::Ok) {
178
- QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
179
- return py::array_t <char >(0 );
180
- }
181
-
182
- // generate context binary if compilation succeded
183
- QnnExecuTorchContextBinary binary_info;
184
- qnn_manager_->GetContextBinary (binary_info);
185
- // allocate py::array (to pass the result of the C++ function to Python)
186
- auto result = py::array_t <char >(binary_info.nbytes );
187
- auto result_buffer = result.request ();
188
- char * result_ptr = (char *)result_buffer.ptr ;
189
- std::memcpy (result_ptr, binary_info.buffer , binary_info.nbytes );
190
- return result;
191
- }
192
-
193
61
py::array_t <char > Compile (
194
- const std::string& graph_name ,
195
- std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
62
+ const std::vector<std:: string>& graph_names ,
63
+ std::vector<std::vector<std:: shared_ptr<OpWrapper> >>& op_wrappers) {
196
64
QnnExecuTorchContextBinary binary_info;
197
65
198
- if (qnn_manager_->IsMultipleGraphs ()) {
199
- builder_.Reset ();
200
- std::vector<uint8_t > tensor_data;
201
- std::vector<uint64_t > offsets;
202
- std::unordered_map<void *, int > tensor_map;
203
- std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
204
- std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
205
-
206
- auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
207
- std::vector<uint32_t >& index ) {
208
- auto it = tensor_map.find (wrapper.get ());
209
- if (it != tensor_map.end ()) {
210
- index .push_back (it->second );
211
- } else {
212
- tensor_map[wrapper.get ()] = fb_tensors.size ();
213
- index .push_back (fb_tensors.size ());
214
- offsets.push_back (tensor_data.size ());
215
- Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct ();
216
- fb_tensors.emplace_back (
217
- ToTensor (qnn_tensor, offsets.back (), &builder_));
218
- uint8_t * data_ptr = static_cast <uint8_t *>(
219
- QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .data );
220
- if (data_ptr != nullptr ) {
221
- tensor_data.insert (
222
- tensor_data.end (),
223
- data_ptr,
224
- data_ptr + QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .dataSize );
225
- }
226
- }
227
- };
228
-
229
- for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
230
- std::vector<uint32_t > inputs, outputs, params;
231
-
232
- for (const auto & tensor_wrapper : op_wrapper->GetInputTensors ()) {
233
- set_tensor (tensor_wrapper, inputs);
234
- }
235
-
236
- for (const auto & tensor_wrapper : op_wrapper->GetOutputTensors ()) {
237
- set_tensor (tensor_wrapper, outputs);
238
- }
239
-
240
- for (const auto & param : op_wrapper->GetParams ()) {
241
- auto * p_tensor_param = dynamic_cast <TensorParamWrapper*>(param.get ());
242
- if (p_tensor_param != nullptr ) {
243
- auto wrapper = p_tensor_param->GetTensorWrapper ();
244
- wrapper->SetName (param->GetName ());
245
- set_tensor (wrapper, params);
246
- } else {
247
- executorch::runtime::Error err = param->PopulateQnnParam ();
248
- if (err != executorch::runtime::Error::Ok) {
249
- QNN_EXECUTORCH_LOG_ERROR (
250
- " Fail to get scalar parameter in online prepare stage" );
251
- return py::array_t <char >(0 );
252
- }
253
- Qnn_Param_t p = param->GetQnnParam ();
254
- Qnn_Tensor_t t (
255
- {.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
256
- QNN_TENSOR_VER_PTR (t)->name = p.name ;
257
- QNN_TENSOR_VER_PTR (t)->dataType = p.scalarParam .dataType ;
258
- QNN_TENSOR_VER_PTR (t)->clientBuf .data =
259
- static_cast <void *>(&p.scalarParam .uint8Value );
260
- QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize =
261
- GetDataTypeSize (QNN_TENSOR_VER_PTR (t)->dataType );
262
-
263
- // collect tensor data
264
- offsets.push_back (tensor_data.size ());
265
- const uint8_t * data_ptr =
266
- static_cast <uint8_t *>(QNN_TENSOR_VER_PTR (t)->clientBuf .data );
267
- tensor_data.insert (
268
- tensor_data.end (),
269
- data_ptr,
270
- data_ptr + QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize );
271
- params.push_back (fb_tensors.size ());
272
- fb_tensors.emplace_back (ToTensor (t, offsets.back (), &builder_));
273
- }
274
- }
275
-
276
- Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig ();
277
- fb_ops.emplace_back (qcir::CreateOperatorDirect (
278
- builder_,
279
- QNN_OP_VER_PTR (op_config)->name ,
280
- QNN_OP_VER_PTR (op_config)->packageName ,
281
- QNN_OP_VER_PTR (op_config)->typeName ,
282
- &inputs,
283
- &outputs,
284
- ¶ms));
285
- }
286
-
287
- std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs (
288
- {qcir::CreateGraphDirect (
289
- builder_, graph_name.c_str (), &fb_ops, &fb_tensors)});
290
- auto context = qcir::CreateContextDirect (builder_, &fb_graphs);
291
- builder_.Finish (context);
292
-
293
- QnnExecuTorchContextBinary qcir_binary (
294
- {builder_.GetBufferPointer (), builder_.GetSize ()});
295
-
296
- custom_qcir_protocol_buffer_ =
297
- QnnQcirCustomProtocol (qcir_binary.nbytes , tensor_data.size ());
298
- custom_qcir_protocol_buffer_.BuildQcirCustomBuffer (
299
- qcir_binary, tensor_data);
300
- std::tie (binary_info.buffer , binary_info.nbytes ) =
301
- custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
302
- } else {
303
- if (qnn_manager_->Compile (graph_name, op_wrappers) !=
66
+ for (int i = 0 ; i < graph_names.size (); ++i) {
67
+ if (qnn_manager_->Compile (graph_names[i], op_wrappers[i]) !=
304
68
executorch::runtime::Error::Ok) {
305
69
QNN_EXECUTORCH_LOG_ERROR (" Fail to compile QNN graph" );
306
70
return py::array_t <char >(0 );
307
71
}
308
- auto qnn_executorch_options = GetQnnExecuTorchOptions (
309
- qnn_executorch_option_ptr_. cast <std::string_view>(). data ());
310
- if (qnn_executorch_options-> saver () ||
311
- qnn_manager_-> GetContextBinary (binary_info) !=
312
- executorch::runtime::Error::Ok) {
313
- return py:: array_t < char >( 0 );
314
- }
72
+ }
73
+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
74
+ qnn_executorch_option_ptr_. cast <std::string_view>(). data ());
75
+ if (qnn_executorch_options-> saver () ||
76
+ qnn_manager_-> GetContextBinary (binary_info) !=
77
+ executorch::runtime::Error::Ok) {
78
+ return py:: array_t < char >( 0 );
315
79
}
316
80
317
81
// allocate py::array (to pass the result of the C++ function to Python)
0 commit comments