Skip to content

Commit 1e1a28c

Browse files
authored
Merge branch 'main' into export-D70507395
2 parents 56e8ead + 73740e9 commit 1e1a28c

File tree

96 files changed

+3151
-1557
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+3151
-1557
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
27e35de6c288bffad1b4d18b393579c1d1a95547
1+
08434df1f2f88c9770e59246caa2ff9c6f613270

.ci/scripts/test_model.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ test_model() {
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103+
if [[ "${MODEL_NAME}" == "phi4_mini" ]]; then
104+
# Install requirements for export_llama
105+
bash examples/models/llama/install_requirements.sh
106+
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
108+
run_portable_executor_runner
109+
rm "./${MODEL_NAME}.pte"
110+
fi
103111

104112
# Export a basic .pte and run the model.
105113
"${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"

.github/workflows/trunk.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ jobs:
229229
# see if we can import the module successfully
230230
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
231231
232-
test-static-llama-ane:
232+
test-static-llama-ane:
233233
name: test-static-llama-ane
234234
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
235235
with:

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -749,9 +749,9 @@ endif()
749749

750750
if(EXECUTORCH_BUILD_PTHREADPOOL
751751
AND EXECUTORCH_BUILD_CPUINFO
752-
AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
753752
)
754753
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
754+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
755755
endif()
756756

757757
if(EXECUTORCH_BUILD_PYBIND)

backends/arm/scripts/build_executorch_runner.sh

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ toolchain_cmake=${et_root_dir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmak
1414
pte_file=""
1515
target="ethos-u55-128"
1616
build_type="Release"
17-
system_config=""
1817
bundleio=false
18+
system_config=""
19+
memory_mode=""
1920
build_with_etdump=false
2021
extra_build_flags=""
2122
output_folder_set=false
@@ -32,9 +33,12 @@ help() {
3233
echo " --pte=<PTE_FILE> pte file (genrated by the aot_arm_compier from the model to include in the elf"
3334
echo " --target=<TARGET> Target to build and run for Default: ${target}"
3435
echo " --build_type=<TYPE> Build with Release, Debug or RelWithDebInfo, default is ${build_type}"
35-
echo " --system_config=<CONFIG> System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets."
36-
echo " NOTE: If given, this option must match the given target. This option also sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt."
3736
echo " --bundleio Support both pte and Bundle IO bpte using Devtools BundelIO with Input/RefOutput included"
37+
echo " --system_config=<CONFIG> System configuration to select from the Vela configuration file (see vela.ini). Default: Ethos_U55_High_End_Embedded for EthosU55 targets, Ethos_U85_SYS_DRAM_Mid for EthosU85 targets."
38+
echo " NOTE: If given, this option must match the given target. This option along with the memory_mode sets timing adapter values customized for specific hardware, see ./executor_runner/CMakeLists.txt."
39+
echo " --memory_mode=<CONFIG> Vela memory mode, used for setting the Timing Adapter parameters of the Corstone platforms."
40+
echo " Valid values are Shared_Sram(for Ethos-U55, Ethos-U65, Ethos-85), Sram_Only(for Ethos-U55, Ethos-U65, Ethos-U85) or Dedicated_Sram(for Ethos-U65, Ethos-U85)."
41+
echo " Default: Shared_Sram for the Ethos-U55 and Sram_Only for the Ethos-U85"
3842
echo " --etdump Adds Devtools etdump support to track timing, etdump area will be base64 encoded in the log"
3943
echo " --extra_build_flags=<FLAGS> Extra flags to pass to cmake like -DET_ARM_BAREMETAL_METHOD_ALLOCATOR_POOL_SIZE=60000 Default: none "
4044
echo " --output=<FOLDER> Output folder Default: <MODEL>/<MODEL>_<TARGET INFO>.pte"
@@ -49,8 +53,9 @@ for arg in "$@"; do
4953
--pte=*) pte_file="${arg#*=}";;
5054
--target=*) target="${arg#*=}";;
5155
--build_type=*) build_type="${arg#*=}";;
52-
--system_config=*) system_config="${arg#*=}";;
5356
--bundleio) bundleio=true ;;
57+
--system_config=*) system_config="${arg#*=}";;
58+
--memory_mode=*) memory_mode="${arg#*=}";;
5459
--etdump) build_with_etdump=true ;;
5560
--extra_build_flags=*) extra_build_flags="${arg#*=}";;
5661
--output=*) output_folder="${arg#*=}" ; output_folder_set=true ;;
@@ -83,6 +88,15 @@ then
8388
fi
8489
fi
8590

91+
if [[ ${memory_mode} == "" ]]
92+
then
93+
memory_mode="Shared_Sram"
94+
if [[ ${target} =~ "ethos-u85" ]]
95+
then
96+
memory_mode="Sram_Only"
97+
fi
98+
fi
99+
86100
output_folder=$(realpath ${output_folder})
87101

88102
if [[ ${target} == *"ethos-u55"* ]]; then
@@ -91,7 +105,7 @@ else
91105
target_cpu=cortex-m85
92106
fi
93107
echo "--------------------------------------------------------------------------------"
94-
echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} ${extra_build_flags} to '${output_folder}/cmake-out'"
108+
echo "Build Arm Baremetal executor_runner for ${target} with ${pte_file} using ${system_config} ${memory_mode} ${extra_build_flags} to '${output_folder}/cmake-out'"
95109
echo "--------------------------------------------------------------------------------"
96110

97111
cd ${et_root_dir}/examples/arm/executor_runner
@@ -120,6 +134,7 @@ cmake \
120134
${build_with_etdump_flags} \
121135
-DPYTHON_EXECUTABLE=$(which python3) \
122136
-DSYSTEM_CONFIG=${system_config} \
137+
-DMEMORY_MODE=${memory_mode} \
123138
${extra_build_flags} \
124139
-B ${output_folder}/cmake-out
125140

backends/cadence/hifi/operators/op_quantized_conv_out.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
22

33
#include <executorch/backends/cadence/hifi/kernels/kernels.h>
4-
#include <executorch/backends/cadence/reference/operators/operators.h>
4+
#include <executorch/backends/cadence/hifi/operators/operators.h>
55
#include <executorch/runtime/kernel/kernel_includes.h>
66

77
#define ALIGN_PTR(x, bytes) ((((unsigned)(x)) + (bytes - 1)) & (~(bytes - 1)))
@@ -1108,4 +1108,5 @@ void quantized_conv_per_tensor_out(
11081108
} // namespace native
11091109
} // namespace HiFi
11101110
} // namespace impl
1111-
} // namespace cadence
1111+
} // namespace cadence
1112+
} // namespace cadence

backends/qualcomm/CMakeLists.txt

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ add_library(qnn_executorch_logging STATIC)
126126
add_library(qnn_factory STATIC)
127127
add_library(qnn_function_interface INTERFACE)
128128
add_library(qnn_graph STATIC)
129-
add_library(qnn_header INTERFACE)
130129
add_library(qnn_implementation STATIC)
131130
add_library(qnn_logger STATIC)
132131
add_library(qnn_manager STATIC)
@@ -143,16 +142,12 @@ add_library(utils STATIC)
143142
# declare dependency
144143
#
145144
target_link_libraries(qcir_utils PRIVATE qcir)
146-
target_link_libraries(wrappers PRIVATE qnn_header qnn_executorch_logging)
147-
target_link_libraries(qnn_function_interface INTERFACE qnn_header)
145+
target_link_libraries(wrappers PRIVATE qnn_executorch_logging)
148146
target_link_libraries(
149-
qnn_implementation PRIVATE qnn_function_interface qnn_header
150-
qnn_executorch_logging ${CMAKE_DL_LIBS}
147+
qnn_implementation PRIVATE qnn_function_interface qnn_executorch_logging ${CMAKE_DL_LIBS}
151148
)
152-
target_link_libraries(qnn_sys_function_interface INTERFACE qnn_header)
153149
target_link_libraries(
154-
qnn_sys_implementation PRIVATE qnn_sys_function_interface qnn_header
155-
qnn_executorch_logging ${CMAKE_DL_LIBS}
150+
qnn_sys_implementation PRIVATE qnn_sys_function_interface qnn_executorch_logging ${CMAKE_DL_LIBS}
156151
)
157152
target_link_libraries(qnn_executorch_logging PRIVATE qnn_schema)
158153
target_link_libraries(qnn_profiler PRIVATE qnn_executorch_logging)
@@ -178,9 +173,7 @@ target_link_libraries(
178173
)
179174

180175
target_link_libraries(
181-
qnn_factory
182-
PUBLIC qnn_header
183-
PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph
176+
qnn_factory PRIVATE qnn_schema qnn_backend qnn_device qnn_context qnn_graph
184177
qnn_mem_manager qnn_custom_protocol
185178
)
186179
target_link_libraries(

backends/qualcomm/runtime/backends/CMakeLists.txt

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -122,54 +122,3 @@ target_sources(
122122
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnCustomProtocol.h
123123
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnCustomProtocol.cpp
124124
)
125-
126-
set(qnn_header_basenames
127-
QnnBackend.h
128-
QnnCommon.h
129-
QnnContext.h
130-
QnnDevice.h
131-
GPU/QnnGpuBackend.h
132-
GPU/QnnGpuCommon.h
133-
GPU/QnnGpuContext.h
134-
GPU/QnnGpuGraph.h
135-
QnnGraph.h
136-
HTP/QnnHtpCommon.h
137-
HTP/QnnHtpDevice.h
138-
HTP/QnnHtpGraph.h
139-
HTP/QnnHtpMem.h
140-
HTP/QnnHtpPerfInfrastructure.h
141-
HTP/QnnHtpProfile.h
142-
HTP/QnnHtpProperty.h
143-
HTP/QnnHtpSystemContext.h
144-
QnnInterface.h
145-
QnnLog.h
146-
QnnMem.h
147-
QnnOpDef.h
148-
QnnOpPackage.h
149-
QnnProfile.h
150-
QnnProperty.h
151-
Saver/QnnSaver.h
152-
Saver/QnnSaverCommon.h
153-
QnnSdkBuildId.h
154-
QnnSignal.h
155-
QnnTensor.h
156-
QnnTypes.h
157-
System/QnnSystemCommon.h
158-
System/QnnSystemContext.h
159-
System/QnnSystemInterface.h
160-
)
161-
162-
set(QNN_HEADER_DIR_DST ${CMAKE_CURRENT_BINARY_DIR}/QNN/include)
163-
164-
# add the custom commands to copy each headers
165-
foreach(_qnn_header ${qnn_header_basenames})
166-
# copy at generation time to make below target_sources(qnn_header) happy.
167-
configure_file(
168-
${QNN_SDK_ROOT}/include/QNN/${_qnn_header}
169-
${QNN_HEADER_DIR_DST}/${_qnn_header} COPYONLY
170-
)
171-
list(APPEND qnn_header_files ${QNN_HEADER_DIR_DST}/${_qnn_header})
172-
endforeach()
173-
174-
# qnn_header
175-
target_sources(qnn_header INTERFACE ${qnn_header_files})

backends/vulkan/targets.bzl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@ load("@fbsource//tools/build_defs:platform_defs.bzl", "ANDROID", "CXX", "FBCODE"
55

66

77
def get_vulkan_compiler_flags():
8-
return ["-Wno-missing-prototypes", "-Wno-global-constructors"]
8+
return select({
9+
"DEFAULT": [
10+
"-Wno-global-constructors",
11+
"-Wno-missing-prototypes",
12+
],
13+
"ovr_config//os:windows": [],
14+
})
915

1016
def get_labels(no_volk):
1117
if no_volk:

backends/xnnpack/partition/config/gemm_configs.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,9 @@ def _detect_precision(self, node: torch.fx.Node) -> ConfigPrecisionType:
9696
def _overwrite_precision(self, node: torch.fx.Node):
9797
precision = self._detect_precision(node)
9898
if precision not in self.enabled_precision_types:
99-
# detected precision is not enabled, lets try to partition it as fp32
99+
# detected precision is not enabled, try to partition it as fp32
100100
if self.enabled_precision_types == [ConfigPrecisionType.FP32]:
101-
# if only fp32 is enabled, then we can still partition fp32 gemms
101+
# when only fp32 is enabled, then we can still partition fp32 gemms
102102
# even with in a quantized graph
103103
if precision in [
104104
ConfigPrecisionType.STATIC_QUANT,
@@ -107,6 +107,7 @@ def _overwrite_precision(self, node: torch.fx.Node):
107107
precision = ConfigPrecisionType.FP32
108108
logging.info(f"Overwriting precision, partitioning {node} as FP32")
109109
return True, precision
110+
110111
return False, precision
111112

112113
def get_deps(
@@ -226,8 +227,11 @@ def _get_bias_deps(
226227
self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
227228
) -> Tuple[bool, List[torch.fx.Node]]:
228229
gemm_deps = []
229-
if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
230-
# if force force_fp32_dynamic_linear is enabled, then we
230+
if (
231+
precision == ConfigPrecisionType.FP32
232+
and self.force_non_static_weights_for_f32_linear
233+
):
234+
# if force_non_static_weights_for_f32_linear is enabled, then we
231235
# do not partition the weight node
232236
return (True, gemm_deps)
233237

@@ -305,8 +309,11 @@ def get_original_aten(self) -> Optional[torch._ops.OpOverload]:
305309
def _get_weight_deps(
306310
self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
307311
) -> Tuple[bool, List[torch.fx.Node]]:
308-
if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
309-
# if force fp32_dynamic_linear is enabled, then we
312+
if (
313+
precision == ConfigPrecisionType.FP32
314+
and self.force_non_static_weights_for_f32_linear
315+
):
316+
# if force_non_static_weights_for_f32_linear is enabled, then we
310317
# do not partition the weight node
311318
return (True, [])
312319

@@ -412,9 +419,11 @@ def __init__(self, **kwargs):
412419
def _get_weight_deps(
413420
self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
414421
) -> Tuple[bool, List[torch.fx.Node]]:
415-
# TODO(maxren, T210537195):
416-
if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
417-
# if force fp32_dynamic_linear is on and we detected this as fp32, then we
422+
if (
423+
precision == ConfigPrecisionType.FP32
424+
and self.force_non_static_weights_for_f32_linear
425+
):
426+
# if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we
418427
# do not partition the weight node
419428
return (True, [])
420429

@@ -501,11 +510,11 @@ def find_partition_args(input_node):
501510
node.args = old_args
502511
node.users = old_users
503512

504-
# When using force_fp32_dynamic_linear, we want to get_deps to overwrite the source partition nodes.
513+
# When using force_non_static_weights_for_f32_linear, we want to get_deps to overwrite the source partition nodes.
505514
# Else we want to be greedy.
506515
ret_deps = (
507516
list(set(deps) & set(src_partition.nodes))
508-
if self.force_fp32_dynamic_linear
517+
if self.force_non_static_weights_for_f32_linear
509518
else list(set(deps) | set(src_partition.nodes))
510519
)
511520

@@ -531,8 +540,11 @@ def __init__(self, **kwargs):
531540
def _get_weight_deps(
532541
self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
533542
) -> Tuple[bool, List[torch.fx.Node]]:
534-
if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
535-
# if force fp32_dynamic_linear is on and we detected this as fp32, then we
543+
if (
544+
precision == ConfigPrecisionType.FP32
545+
and self.force_non_static_weights_for_f32_linear
546+
):
547+
# if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we
536548
# do not partition the weight node
537549
return (True, [])
538550

backends/xnnpack/partition/config/xnnpack_config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ def __init__(self, **kwargs):
4141
super().__init__()
4242
self.enabled_precision_types = self.supported_precision_types()
4343
# Flag used in GEMMConfig()
44-
self.force_fp32_dynamic_linear = kwargs.get("force_fp32_dynamic_linear", False)
44+
self.force_non_static_weights_for_f32_linear = kwargs.get(
45+
"force_non_static_weights_for_f32_linear", False
46+
)
4547

4648
def get_partition(
4749
self, node: torch.fx.Node, ep: ExportedProgram

backends/xnnpack/test/ops/test_linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ def test_linear_qd8_as_fp32(self):
948948
},
949949
)
950950

951-
def test_linear_fp32_with_force_as_mm(self):
951+
def test_linear_with_force_non_static_weights_for_f32_linear(self):
952952
def check_signature(
953953
signature: ExportGraphSignature,
954954
force_flag: bool,
@@ -981,7 +981,7 @@ def check_signature(
981981
inputs = module.get_inputs()
982982
tester = Tester(module, inputs).export()
983983
partitioner = XnnpackPartitioner(
984-
force_fp32_dynamic_linear=force_flag
984+
force_non_static_weights_for_f32_linear=force_flag
985985
)
986986
if legacy_mode:
987987
tester.to_edge()

backends/xnnpack/test/ops/test_lstm.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,20 @@ def test_fp32_lstm(self):
4343
.run_method_and_compare_outputs()
4444
)
4545

46-
def test_fp32_lstm_force_dynamic_linear(self):
46+
def test_lstm_with_force_non_static_weights_for_f32_linear(self):
4747
(
4848
Tester(self.LSTMLinear(32, 32, 10), (torch.rand(1, 32, 32),))
4949
.export()
5050
.to_edge_transform_and_lower(
5151
ToEdgeTransformAndLower(
52-
partitioners=[XnnpackPartitioner(force_fp32_dynamic_linear=True)]
52+
partitioners=[
53+
XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)
54+
]
5355
)
5456
)
5557
.check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"])
5658
# Weights are supplied as input to linears
57-
# Biases are not owned by delegates when force_fp32_dynamic_linear is set
59+
# Biases are not owned by delegates when force_non_static_weights_for_f32_linear is set
5860
.check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0", "p_lstm_bias"])
5961
.to_executorch()
6062
.serialize()

build/build_android_library.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ collect_artifacts_to_be_uploaded() {
178178
}
179179

180180
main() {
181-
BUILD_AAR_DIR="$(mktemp -d)"
181+
if [[ -z "${BUILD_AAR_DIR:-}" ]]; then
182+
BUILD_AAR_DIR="$(mktemp -d)"
183+
fi
182184
export BUILD_AAR_DIR
183185
if [ -z "$ANDROID_ABIS" ]; then
184186
ANDROID_ABIS=("arm64-v8a" "x86_64")

0 commit comments

Comments
 (0)