Skip to content

Commit 662a6ac

Browse files
authored
Merge branch 'main' into gh/lucylq/81/orig
2 parents e56fafb + a6e2961 commit 662a6ac

File tree

92 files changed

+4198
-1775
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+4198
-1775
lines changed

.ci/scripts/build_llama_android.sh

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,12 @@ install_executorch_and_backend_lib() {
2222
ANDROID_NDK=/opt/ndk
2323
BUCK2=buck2
2424
ANDROID_ABI=arm64-v8a
25-
cmake -DBUCK2="${BUCK2}" \
25+
cmake --preset llm \
26+
-DBUCK2="${BUCK2}" \
2627
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
2728
-DANDROID_ABI="${ANDROID_ABI}" \
2829
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
2930
-DCMAKE_BUILD_TYPE=Release \
30-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
31-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
32-
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
33-
-DEXECUTORCH_BUILD_XNNPACK=ON \
34-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
35-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
36-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
3731
-DXNNPACK_ENABLE_ARM_BF16=OFF \
3832
-Bcmake-android-out .
3933

@@ -51,11 +45,7 @@ build_llama_runner() {
5145
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
5246
-DANDROID_ABI="${ANDROID_ABI}" \
5347
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
54-
-DCMAKE_BUILD_TYPE=Release -DPYTHON_EXECUTABLE=python \
55-
-DEXECUTORCH_BUILD_XNNPACK=ON \
56-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
57-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
58-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
48+
-DCMAKE_BUILD_TYPE=Release \
5949
-Bcmake-android-out/examples/models/llama examples/models/llama
6050

6151
cmake --build cmake-android-out/examples/models/llama -j4 --config Release

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -152,21 +152,11 @@ which "${PYTHON_EXECUTABLE}"
152152
cmake_install_executorch_libraries() {
153153
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
154154
rm -rf cmake-out
155-
retry cmake \
155+
retry cmake --preset llm \
156156
-DCMAKE_INSTALL_PREFIX=cmake-out \
157157
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
158-
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
159-
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
160-
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
161-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
162-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
163-
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
164-
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
165-
-DEXECUTORCH_BUILD_MPS="$MPS" \
166-
-DEXECUTORCH_BUILD_COREML="$COREML" \
167158
-DEXECUTORCH_BUILD_QNN="$QNN" \
168159
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
169-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
170160
-Bcmake-out .
171161
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
172162
}
@@ -181,10 +171,6 @@ cmake_build_llama_runner() {
181171
retry cmake \
182172
-DCMAKE_INSTALL_PREFIX=cmake-out \
183173
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
184-
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
185-
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
186-
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
187-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
188174
-Bcmake-out/${dir} \
189175
${dir}
190176
cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"

.github/workflows/build-presets.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
strategy:
2121
fail-fast: false
2222
matrix:
23-
preset: [macos-arm64, pybind]
23+
preset: [macos-arm64, pybind, llm]
2424
with:
2525
job-name: build
2626
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -32,14 +32,14 @@ jobs:
3232
set -eux
3333
${CONDA_RUN} ./install_requirements.sh > /dev/null
3434
${CONDA_RUN} cmake --preset ${{ matrix.preset }}
35-
${CONDA_RUN} cmake --build cmake-out --parallel
35+
${CONDA_RUN} cmake --build cmake-out -j$(( $(sysctl -n hw.ncpu) - 1 ))
3636
3737
linux:
3838
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
3939
strategy:
4040
fail-fast: false
4141
matrix:
42-
preset: [pybind]
42+
preset: [pybind, llm]
4343
runner: [linux.2xlarge, linux.arm64.2xlarge]
4444
docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
4545
# Excluding specific runner + docker image combinations that don't make sense:
@@ -65,4 +65,4 @@ jobs:
6565
6666
./install_requirements.sh > /dev/null
6767
cmake --preset ${{ matrix.preset }}
68-
cmake --build cmake-out --parallel
68+
cmake --build cmake-out -j$(( $(nproc) - 1 ))

.github/workflows/trunk.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,3 +692,29 @@ jobs:
692692
build-mode: Release
693693
build-tool: cmake
694694
docker-image: executorch-ubuntu-22.04-clang12
695+
696+
unittest-nxp-neutron:
697+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
698+
permissions:
699+
id-token: write
700+
contents: read
701+
with:
702+
runner: linux.2xlarge
703+
docker-image: executorch-ubuntu-22.04-clang12
704+
submodules: 'true'
705+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
706+
timeout: 90
707+
script: |
708+
set -eux
709+
710+
# The generic Linux job chooses to use base env, not the one setup by the image
711+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
712+
conda activate "${CONDA_ENV}"
713+
714+
# Build and install Executorch
715+
PYTHON_EXECUTABLE=python \
716+
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
717+
.ci/scripts/setup-linux.sh --build-tool "cmake"
718+
719+
# Run pytest
720+
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh

CMakePresets.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,26 @@
3636
"string": "${hostSystemName}",
3737
"list": ["Darwin", "Linux", "Windows"]
3838
}
39+
},
40+
{
41+
"name": "llm",
42+
"displayName": "Build LLM libraries",
43+
"inherits": [
44+
"common"
45+
],
46+
"cacheVariables": {
47+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/llm.cmake",
48+
"CMAKE_OSX_DEPLOYMENT_TARGET": "10.15"
49+
},
50+
"condition": {
51+
"type": "inList",
52+
"string": "${hostSystemName}",
53+
"list": [
54+
"Darwin",
55+
"Linux",
56+
"Windows"
57+
]
58+
}
3959
}
4060
]
4161
}

backends/apple/coreml/CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ endif()
2525

2626
option(COREML_BUILD_EXECUTOR_RUNNER "Build CoreML executor runner." OFF)
2727

28-
set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
29-
3028
# inmemoryfs sources
3129
set(INMEMORYFS_SOURCES
3230
runtime/inmemoryfs/inmemory_filesystem.cpp
@@ -240,7 +238,6 @@ if(EXECUTORCH_BUILD_COREML AND EXECUTORCH_BUILD_PYBIND)
240238

241239
pybind11_add_module(executorchcoreml SHARED runtime/inmemoryfs/inmemory_filesystem_py.cpp)
242240

243-
target_compile_options(executorchcoreml PRIVATE -mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET})
244241
if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
245242
target_compile_options(executorchcoreml PRIVATE -g)
246243
endif()

backends/arm/scripts/install_reference_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ tosa_reference_model_url="https://git.gitlab.arm.com/tosa/tosa-reference-model.g
1313
tosa_reference_model_0_80_branch="v0.80"
1414
tosa_reference_model_0_80_rev="70ed0b40fa831387e36abdb4f7fb9670a3464f5a"
1515
tosa_serialization_lib_0_80_rev="v0.80.1"
16-
tosa_reference_model_1_0_rev="4d17b5b960cd986d8cb8052188fbe3ae494789e8"
16+
tosa_reference_model_1_0_rev="d102f426dd2e3c1f25bbf23292ec8ee51aa9c677"
1717

1818
script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
1919

backends/cadence/aot/tests/test_fusion_ops_passes.py

Lines changed: 109 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@
1313
import executorch.backends.cadence.aot.ops_registrations # noqa
1414
import torch
1515
from executorch.backends.cadence.aot import compiler
16-
from executorch.backends.cadence.aot.compiler import (
17-
export_to_edge,
18-
quantize_and_export_to_edge,
19-
)
2016
from executorch.backends.cadence.aot.fuse_ops import (
2117
FuseFullThenReshapePass,
2218
FuseMulScalarIntoDequantPass,
@@ -336,94 +332,144 @@ def test_replace_quant_view_dequant_with_requantize(self):
336332
)
337333

338334
def test_replace_dequant_quant_with_requantize(self):
339-
class M(torch.nn.Module):
340-
def __init__(self):
341-
super().__init__()
342-
343-
def forward(self, x):
344-
x = torch.ops.quantized_decomposed.dequantize_per_tensor(
345-
x, 1.2, 3, 0, 127, torch.int8
346-
)
347-
x = torch.permute(x, [2, 0, 1, 3])
348-
x = torch.ops.quantized_decomposed.quantize_per_tensor(
349-
x, 4.5, 6, 0, 127, torch.int8
350-
)
351-
return x
352-
353-
inputs = torch.randn(2, 12, 1, 6).to(torch.int8)
354-
model = M()
355-
graph_module = export_to_edge(model, (inputs,)).exported_program().graph_module
356-
graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
335+
builder = GraphBuilder()
336+
x = builder.placeholder("x", torch.randn(2, 12, 1, 6, dtype=torch.float32))
337+
dequant = builder.call_operator(
338+
op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
339+
args=(x, 1.2, 3, 0, 127, torch.int8),
340+
)
341+
quant = builder.call_operator(
342+
op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
343+
args=(dequant, 4.5, 6, 0, 127, torch.int8),
344+
)
345+
builder.output(quant)
346+
graph_module = FuseQuantDequantToRequantizePass()(
347+
builder.get_graph_module()
348+
).graph_module
357349

358350
self.check_op_counts(
359351
graph_module,
360352
expected_op_counts={
361-
# Verify that dequant -> permute -> quant was replaced with permute -> requantize.
353+
# Verify that dequant -> quant was replaced with requantize.
362354
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
363355
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
364356
exir_ops.edge.cadence.requantize.default: 1,
365357
},
366358
)
367359

368360
def test_replace_dequant_permute_quant_with_requantize(self):
369-
class M(torch.nn.Module):
370-
def __init__(self):
371-
super().__init__()
372-
373-
def forward(self, x):
374-
x = torch.ops.quantized_decomposed.dequantize_per_tensor(
375-
x, 1.2, 3, 0, 127, torch.int8
376-
)
377-
x = torch.permute(x, [2, 0, 1, 3])
378-
x = torch.ops.quantized_decomposed.quantize_per_tensor(
379-
x, 4.5, 6, 0, 127, torch.int8
380-
)
381-
return x
382-
383-
inputs = torch.randn(2, 12, 1, 6).to(torch.int8)
384-
model = M()
385-
graph_module = export_to_edge(model, (inputs,)).exported_program().graph_module
386-
graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
361+
builder = GraphBuilder()
362+
x = builder.placeholder("x", torch.randn(2, 12, 1, 6, dtype=torch.float32))
363+
dequant = builder.call_operator(
364+
op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
365+
args=(x, 1.2, 3, 0, 127, torch.int8),
366+
)
367+
permute = builder.call_operator(
368+
op=exir_ops.edge.aten.permute_copy.default, args=(dequant, [2, 0, 1, 3])
369+
)
370+
quant = builder.call_operator(
371+
op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
372+
args=(permute, 4.5, 6, 0, 127, torch.int8),
373+
)
374+
builder.output(quant)
375+
graph_module = FuseQuantDequantToRequantizePass()(
376+
builder.get_graph_module()
377+
).graph_module
387378

388379
self.check_op_counts(
389380
graph_module,
390381
expected_op_counts={
391382
# Verify that dequant -> permute -> quant was replaced with permute -> requantize.
392383
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
393384
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
385+
exir_ops.edge.aten.permute_copy.default: 1,
394386
exir_ops.edge.cadence.requantize.default: 1,
395387
},
396388
)
397389

398390
def test_remove_nop_dequant_quant(self):
399-
class M(torch.nn.Module):
400-
def __init__(self):
401-
super(M, self).__init__()
402-
self.lin1 = torch.nn.Linear(6, 12, bias=False)
403-
self.lin2 = torch.nn.Linear(12, 24, bias=False)
391+
LEADING_DIMS: Final[int] = 12
392+
IN_DIM: Final[int] = 6
393+
OUT_DIM: Final[int] = 12
404394

405-
def forward(self, x):
406-
x = self.lin1(x)
407-
# redundant dequant+quant will be created around this permute
408-
x = torch.permute(x, [0, 2, 1, 3])
409-
x = self.lin2(x)
410-
return x
411-
412-
inputs = torch.randn(2, 12, 1, 6)
413-
model = M()
414-
graph_module = (
415-
quantize_and_export_to_edge(model, (inputs,))
416-
.exported_program()
417-
.graph_module
395+
builder = GraphBuilder()
396+
x = builder.placeholder(
397+
"x", torch.randn(LEADING_DIMS, IN_DIM, dtype=torch.float32)
398+
)
399+
quant1 = builder.call_operator(
400+
op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
401+
args=(x, 4.5, 6, 0, 127, torch.int8),
402+
)
403+
weights = builder.call_operator(
404+
op=exir_ops.edge.aten.full.default, args=([OUT_DIM, IN_DIM], 1)
405+
)
406+
bias = builder.call_operator(
407+
op=exir_ops.edge.aten.full.default, args=([OUT_DIM], 1)
408+
)
409+
weight_zero_point = builder.call_operator(
410+
op=exir_ops.edge.aten.full.default, args=([IN_DIM], 0)
411+
)
412+
out_multiplier = builder.call_operator(
413+
op=exir_ops.edge.aten.full.default, args=([OUT_DIM], 1)
414+
)
415+
out_shift = builder.call_operator(
416+
op=exir_ops.edge.aten.full.default, args=([OUT_DIM], 0)
418417
)
419-
graph_module = FuseQuantDequantToRequantizePass()(graph_module).graph_module
418+
linear1 = builder.call_operator(
419+
op=exir_ops.edge.cadence.quantized_linear.default,
420+
args=(
421+
quant1,
422+
weights,
423+
bias,
424+
0, # src_zero_point
425+
weight_zero_point,
426+
out_multiplier,
427+
out_shift,
428+
0, # out_zero_point
429+
None,
430+
),
431+
)
432+
dequant1 = builder.call_operator(
433+
op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
434+
args=(linear1, 1.2, 3, 0, 127, torch.int8),
435+
)
436+
permute = builder.call_operator(
437+
op=exir_ops.edge.aten.permute_copy.default, args=(dequant1, [1, 0])
438+
)
439+
quant2 = builder.call_operator(
440+
op=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
441+
args=(permute, 4.5, 6, 0, 127, torch.int8),
442+
)
443+
linear2 = builder.call_operator(
444+
op=exir_ops.edge.cadence.quantized_linear.default,
445+
args=(
446+
quant2,
447+
weights,
448+
bias,
449+
0, # src_zero_point
450+
weight_zero_point,
451+
out_multiplier,
452+
out_shift,
453+
0, # out_zero_point
454+
None,
455+
),
456+
)
457+
dequant2 = builder.call_operator(
458+
op=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
459+
args=(linear2, 1.2, 3, 0, 127, torch.int8),
460+
)
461+
builder.output(dequant2)
462+
graph_module = FuseQuantDequantToRequantizePass()(
463+
builder.get_graph_module()
464+
).graph_module
420465
self.check_op_counts(
421466
graph_module,
422467
expected_op_counts={
423-
# Verify that one dequant/quant pair was removed
424-
# Expect 1 quantize ops: 1 input
468+
# Verify that one dequant/quant pair was removed from chain:
469+
# quant->linear->dequant->permute->quant->linear->dequant
470+
# gets converted to:
471+
# quant->linear->permute->linear->dequant
425472
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 1,
426-
# Expect 1 dequant op at the end (output of second linear)
427473
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 1,
428474
},
429475
)

0 commit comments

Comments
 (0)