Skip to content

Commit 4746119

Browse files
authored
Merge branch 'main' into gh/trivedivivek/90/orig
2 parents 83ad3d3 + 18e5eca commit 4746119

37 files changed

+2136
-2369
lines changed

backends/qualcomm/runtime/SharedBuffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ std::size_t std::hash<CustomMemTensorInfo>::operator()(
2222
hash_val ^= std::hash<size_t>()(info.pos);
2323
hash_val ^= std::hash<size_t>()(info.tensor_bytes);
2424
for (int i = 0; i < info.rank; ++i) {
25-
hash_val ^= info.shape[i];
25+
hash_val ^= std::hash<uint32_t>()(info.shape[i]);
2626
}
2727
hash_val ^= std::hash<uint32_t>()(info.rank);
2828
hash_val ^= std::hash<executorch::aten::ScalarType>()(info.dtype);

backends/qualcomm/runtime/backends/QnnBackendFactory.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,9 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
8080
options->soc_info(),
8181
htp_options);
8282
backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
83-
implementation, backend_params->qnn_context_ptr_.get());
83+
implementation,
84+
backend_params->qnn_context_ptr_.get(),
85+
options->log_level());
8486
backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
8587
} break;
8688
case QnnExecuTorchBackendType::kGpuBackend:

backends/qualcomm/runtime/backends/QnnMemManager.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,12 @@ Error QnnMemManager::RegisterIonMem(
4747
}
4848
tensor_wrapper->SetMemHandle(handle);
4949
registered_map_.insert({handle, mem_ptr});
50-
QNN_EXECUTORCH_LOG_INFO(
51-
"Tensor %s is successfully registered to ION shared memory.",
52-
tensor_wrapper->GetName().c_str());
50+
if (log_level_ >= QnnExecuTorchLogLevel::kLogLevelInfo) {
51+
QNN_EXECUTORCH_LOG_INFO(
52+
"Tensor %s is successfully registered to ION shared memory.",
53+
tensor_wrapper->GetName().c_str());
54+
}
55+
5356
return Error::Ok;
5457
}
5558

@@ -92,9 +95,11 @@ Error QnnMemManager::RegisterCustomMem(
9295
}
9396
tensor_wrapper->SetMemHandle(handle);
9497
registered_map_.insert({handle, mem_ptr});
95-
QNN_EXECUTORCH_LOG_INFO(
96-
"Tensor %s is successfully registered to custom shared memory.",
97-
tensor_wrapper->GetName().c_str());
98+
if (log_level_ >= QnnExecuTorchLogLevel::kLogLevelInfo) {
99+
QNN_EXECUTORCH_LOG_INFO(
100+
"Tensor %s is successfully registered to custom shared memory.",
101+
tensor_wrapper->GetName().c_str());
102+
}
98103
return Error::Ok;
99104
}
100105

backends/qualcomm/runtime/backends/QnnMemManager.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@ class QnnMemManager {
2121
public:
2222
explicit QnnMemManager(
2323
const QnnImplementation& implementation,
24-
QnnContext* context)
25-
: implementation_(implementation), context_(context) {}
24+
QnnContext* context,
25+
QnnExecuTorchLogLevel log_level)
26+
: implementation_(implementation),
27+
context_(context),
28+
log_level_(log_level) {}
2629
~QnnMemManager() {
2730
DeRegisterMem();
2831
}
@@ -63,6 +66,7 @@ class QnnMemManager {
6366

6467
const QnnImplementation& implementation_;
6568
QnnContext* context_;
69+
QnnExecuTorchLogLevel log_level_;
6670
std::unordered_map<Qnn_MemHandle_t, void*> registered_map_;
6771
std::unordered_map<CustomMemTensorInfo, void*> pre_registered_handles_;
6872
std::unordered_map<executorch::aten::ScalarType, Qnn_DataType_t>

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3681,7 +3681,7 @@ def test_llama3_2_1b(self):
36813681
if self.pre_gen_pte:
36823682
cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
36833683

3684-
golden_start_with = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>"
3684+
golden_start_with = "<|start_header_id|>user<|end_header_id|>"
36853685
p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
36863686
with Listener((self.ip, self.port)) as listener:
36873687
conn = listener.accept()

backends/xnnpack/operators/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
op_dynamic_quantize_ops,
2121
op_elu,
2222
op_floor,
23+
op_gelu,
2324
op_hardswish,
2425
op_hardtanh,
2526
op_leaky_relu,

backends/xnnpack/operators/op_gelu.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Dict
8+
9+
import torch
10+
from executorch.backends.xnnpack.operators.node_visitor import (
11+
NodeVisitor,
12+
register_node_visitor,
13+
)
14+
from executorch.backends.xnnpack.serialization.xnnpack_graph_schema import (
15+
XNNGelu,
16+
XNNGraph,
17+
XNode,
18+
)
19+
from executorch.backends.xnnpack.utils.utils import get_input_node
20+
21+
22+
@register_node_visitor
23+
class GeluVisitor(NodeVisitor):
24+
target = "aten.gelu.default"
25+
26+
def __init__(self, *args) -> None:
27+
super().__init__(*args)
28+
29+
def define_node(
30+
self,
31+
node: torch.fx.Node,
32+
xnn_graph: XNNGraph,
33+
vals_to_ids: Dict[torch.fx.Node, int],
34+
debug_handle: int,
35+
) -> None:
36+
self.define_nodes_tensor_inputs_outputs(node, xnn_graph, vals_to_ids)
37+
38+
# input
39+
input_id = vals_to_ids[get_input_node(node, 0)]
40+
41+
# output
42+
output_id = vals_to_ids[node]
43+
44+
ser_node = XNode(
45+
xnode_union=XNNGelu(
46+
input_id=input_id,
47+
output_id=output_id,
48+
flags=0,
49+
),
50+
debug_handle=debug_handle,
51+
)
52+
xnn_graph.xnodes.append(ser_node)

backends/xnnpack/partition/config/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
DeQuantizedPerTensorConfig,
2727
DivConfig,
2828
FloorConfig,
29+
GeluConfig,
2930
HardswishConfig,
3031
# EluConfig,
3132
HardtanhConfig,
@@ -79,6 +80,7 @@
7980
DivConfig,
8081
# EluConfig, # Waiting for PyTorch Pin Update
8182
FloorConfig,
83+
GeluConfig,
8284
HardtanhConfig,
8385
HardswishConfig,
8486
LeakyReLUConfig,

backends/xnnpack/partition/config/generic_node_configs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,13 @@ def supported_precision_types(self) -> List[ConfigPrecisionType]:
343343
return [ConfigPrecisionType.FP32]
344344

345345

346+
class GeluConfig(GenericNodePartitionerConfig):
347+
target_name = "gelu.default"
348+
349+
def supported_precision_types(self) -> List[ConfigPrecisionType]:
350+
return [ConfigPrecisionType.FP32]
351+
352+
346353
class HardswishConfig(GenericNodePartitionerConfig):
347354
target_name = "hardswish.default"
348355

backends/xnnpack/partition/configs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
exir_ops.edge.aten.addmm.default, # TODO(T163877189) add constraint for addmm
6666
exir_ops.edge.aten.rsqrt.default,
6767
exir_ops.edge.aten.log.default,
68+
exir_ops.edge.aten.gelu.default,
6869
]
6970

7071
SUPPORTED_MODULES = [

backends/xnnpack/runtime/XNNCompiler.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,36 @@ Error defineLogNode(
14481448
return Error::Ok;
14491449
}
14501450

1451+
/*
1452+
Define serialized gelu node into the subgraph, using the remapped ids
1453+
to map the serialized ids, to the new ids generated when defining the
1454+
tensor value
1455+
*/
1456+
Error defineGeluNode(
1457+
xnn_subgraph_t subgraph_ptr,
1458+
const std::unordered_map<uint32_t, uint32_t>& remapped_ids,
1459+
const NodePtr node,
1460+
const fb_xnnpack::XNNGraph* graph) noexcept {
1461+
MAYBE_UNUSED(graph);
1462+
1463+
auto graph_node = node->xnode_union_as_XNNGelu();
1464+
1465+
xnn_status status = xnn_define_gelu(
1466+
subgraph_ptr,
1467+
remapped_ids.at(graph_node->input_id()),
1468+
remapped_ids.at(graph_node->output_id()),
1469+
graph_node->flags());
1470+
1471+
ET_CHECK_OR_RETURN_ERROR(
1472+
status == xnn_status_success,
1473+
Internal,
1474+
"Failed to create gelu node %i with code: %s",
1475+
node->debug_handle(),
1476+
xnn_status_to_string(status));
1477+
1478+
return Error::Ok;
1479+
}
1480+
14511481
/*
14521482
Define serialized ceiling node into the subgraph, using the remapped ids
14531483
to map the serialized ids, to the new ids generated when defining the
@@ -2009,6 +2039,7 @@ DefineNodeFunc getDefineNodeFunc(fb_xnnpack::XNodeUnion nodeType) {
20092039
_DEFINE(SquareRoot)
20102040
_DEFINE(ReciprocalSquareRoot)
20112041
_DEFINE(Ceiling)
2042+
_DEFINE(Gelu)
20122043
_DEFINE(Hardswish)
20132044
_DEFINE(LeakyReLU)
20142045
_DEFINE(Log)

backends/xnnpack/serialization/runtime_schema.fbs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ union XNodeUnion {
140140
XNNConvTranspose2d: _XNNNodeConv,
141141
XNNReciprocalSquareRoot: _XNNNode1x1,
142142
XNNLog: _XNNNode1x1,
143+
XNNGelu: _XNNNode1x1,
143144
}
144145

145146
union XValueUnion {

backends/xnnpack/serialization/schema.fbs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ union XNodeUnion {
136136
XNNConvTranspose2d: _XNNNodeConv,
137137
XNNReciprocalSquareRoot: _XNNNode1x1,
138138
XNNLog: _XNNNode1x1,
139+
XNNGelu: _XNNNode1x1,
139140
}
140141

141142
union XValueUnion {

backends/xnnpack/serialization/xnnpack_graph_schema.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,11 @@ class XNNCeiling(XNNNode1x1):
291291
pass
292292

293293

294+
@dataclass
295+
class XNNGelu(XNNNode1x1):
296+
pass
297+
298+
294299
@dataclass
295300
class XNNHardswish(XNNNode1x1):
296301
pass
@@ -385,6 +390,7 @@ class XNNScaledDotProductAttention:
385390
XNNBatchMatrixMultiply,
386391
XNNReciprocalSquareRoot,
387392
XNNLog,
393+
XNNGelu,
388394
]
389395

390396

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
9+
import torch
10+
from executorch.backends.xnnpack.test.tester import Tester
11+
12+
13+
class TestGelu(unittest.TestCase):
14+
def setUp(self):
15+
torch._dynamo.reset()
16+
17+
class Gelu(torch.nn.Module):
18+
def __init__(self):
19+
super().__init__()
20+
self.gelu = torch.nn.GELU()
21+
22+
def forward(self, x):
23+
return self.gelu(x)
24+
25+
def run_gelu_test(self, inputs):
26+
(
27+
Tester(self.Gelu(), inputs)
28+
.export()
29+
.check_count({"torch.ops.aten.gelu.default": 1})
30+
.to_edge_transform_and_lower()
31+
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
32+
.check_not(["executorch_exir_dialects_edge__ops_aten_gelu_default"])
33+
.to_executorch()
34+
.serialize()
35+
.run_method_and_compare_outputs()
36+
)
37+
38+
def test_fp16_gelu(self):
39+
inputs = (torch.randn(20).to(torch.float16),)
40+
self.run_gelu_test(inputs)
41+
42+
def test_fp32_gelu(self):
43+
inputs = (torch.randn(20),)
44+
self.run_gelu_test(inputs)

docs/source/using-executorch-ios.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ sudo /Applications/CMake.app/Contents/bin/cmake-gui --install
135135
For example, the following command will build the ExecuTorch Runtime along with all available kernels and backends for the Apple platform in both Release and Debug modes:
136136

137137
```bash
138-
./scripts/build_apple_frameworks.sh --Release --Debug --coreml --mps --xnnpack --custom --optimized --portable --quantized
138+
./scripts/build_apple_frameworks.sh
139139
```
140140

141141
After the build finishes successfully, the resulting frameworks can be found in the `cmake-out` directory.

examples/demo-apps/apple_ios/LLaMA/docs/delegates/xnnpack_README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ curl -LO "https://github.com/facebook/buck2/releases/download/${BUCK2_RELEASE_DA
138138
zstd -cdq "$BUCK2_ARCHIVE" > "$BUCK2" && chmod +x "$BUCK2"
139139
rm "$BUCK2_ARCHIVE"
140140
141-
./scripts/build_apple_frameworks.sh --buck2="$(realpath $BUCK2)" --coreml --custom --mps --optimized --portable --quantized --xnnpack
141+
./scripts/build_apple_frameworks.sh
142142
```
143143

144144
After the build finishes successfully, the resulting frameworks can be found in the `cmake-out` directory. Copy them to your project and link them against your targets.

examples/qualcomm/oss_scripts/llama/CMakeLists.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,18 @@ list(
2828
${CMAKE_CURRENT_LIST_DIR}/qnn_llama_runner.cpp
2929
${CMAKE_CURRENT_LIST_DIR}/runner/runner.cpp
3030
${CMAKE_CURRENT_LIST_DIR}/runner/runner.h
31-
${CMAKE_CURRENT_LIST_DIR}/runner/io_manager.cpp
32-
${CMAKE_CURRENT_LIST_DIR}/runner/io_manager.h
31+
${CMAKE_CURRENT_LIST_DIR}/runner/decoder_runner.cpp
32+
${CMAKE_CURRENT_LIST_DIR}/runner/decoder_runner.h
33+
${CMAKE_CURRENT_LIST_DIR}/runner/prompt_processor.cpp
34+
${CMAKE_CURRENT_LIST_DIR}/runner/prompt_processor.h
35+
${CMAKE_CURRENT_LIST_DIR}/runner/token_generator.cpp
36+
${CMAKE_CURRENT_LIST_DIR}/runner/token_generator.h
37+
${CMAKE_CURRENT_LIST_DIR}/runner/imem_alloc.h
38+
${CMAKE_CURRENT_LIST_DIR}/runner/client_mem.h
39+
${CMAKE_CURRENT_LIST_DIR}/runner/rpc_mem.cpp
40+
${CMAKE_CURRENT_LIST_DIR}/runner/rpc_mem.h
41+
${CMAKE_CURRENT_LIST_DIR}/runner/kv_manager.cpp
42+
${CMAKE_CURRENT_LIST_DIR}/runner/kv_manager.h
3343
)
3444

3545
list(
@@ -42,7 +52,7 @@ list(
4252
# build qnn llama runner
4353
add_executable(qnn_llama_runner ${_llama_runner__srcs})
4454
target_include_directories(
45-
qnn_llama_runner PUBLIC ${_common_include_directories}
55+
qnn_llama_runner PUBLIC ${_common_include_directories} ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizers/include
4656
)
4757

4858
target_link_options_shared_lib(quantized_ops_lib)

0 commit comments

Comments
 (0)