Skip to content

Commit d7aa520

Browse files
authored
Merge branch 'main' into add-module-execute-input-count-check
2 parents a7941df + 1b593ad commit d7aa520

File tree

872 files changed

+51330
-11126
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

872 files changed

+51330
-11126
lines changed

.buckconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
[buck2]
4141
restarter=true
42+
file_watcher=notify
4243

4344
[oss]
4445
folly_cxx_tests = False

.ci/docker/ci_commit_pins/buck2.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2024-12-16
1+
2025-05-06

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ set_up_aot() {
3939
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
4141
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
42-
-DPYTHON_EXECUTABLE=python3 \
43-
-DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
42+
-DPYTHON_EXECUTABLE=python3
4443
cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
4544
# install Python APIs to correct import path
4645
# The filename might vary depending on your Python and host version.

.ci/scripts/test_llava.sh

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ cmake_build_llava_runner_for_android() {
9393
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
9494
-DANDROID_ABI=arm64-v8a \
9595
${LLAVA_COMMON_CMAKE_ARGS} \
96-
-DCMAKE_PREFIX_PATH="$python_lib" \
97-
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \
96+
-DCMAKE_PREFIX_PATH="$python_lib" \
9897
-B${BUILD_DIR}/${dir} \
9998
${dir}
10099

@@ -107,11 +106,10 @@ export_llava() {
107106
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
108107
}
109108

110-
# Download a new image with different size, to test if the model can handle different image sizes
111-
prepare_image_tensor() {
109+
# Download a new image
110+
download_image() {
112111
echo "Downloading image"
113112
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
114-
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
115113
}
116114

117115
run_and_verify() {
@@ -121,20 +119,18 @@ run_and_verify() {
121119
echo "Export failed. Abort"
122120
exit 1
123121
fi
124-
if [[ ! -f "image.pt" ]]; then
125-
echo "image.pt is missing."
122+
if [[ ! -f "basketball.jpg" ]]; then
123+
echo "basketball.jpg is missing."
126124
exit 1
127125
fi
128126
if [[ ! -f "tokenizer.bin" ]]; then
129127
echo "tokenizer.bin is missing."
130128
exit 1
131129
fi
132130

133-
134-
135131
RUNTIME_ARGS="--model_path=llava.pte \
136132
--tokenizer_path=tokenizer.bin \
137-
--image_path=image.pt \
133+
--image_path=basketball.jpg \
138134
--prompt=ASSISTANT: \
139135
--temperature=0 \
140136
--seq_len=650"
@@ -149,13 +145,8 @@ run_and_verify() {
149145

150146
# verify result.txt
151147
RESULT=$(cat result.txt)
152-
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
153-
if [[ "$(uname)" == "Darwin" ]]; then
154-
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
155-
else
156-
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
157-
EXPECTED_PREFIX="ASSISTANT: image"
158-
fi
148+
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. "
149+
159150
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
160151
echo "Expected result prefix: ${EXPECTED_PREFIX}"
161152
echo "Actual result: ${RESULT}"
@@ -184,5 +175,5 @@ fi
184175
export_llava
185176

186177
# Step3. Run
187-
prepare_image_tensor
178+
download_image
188179
run_and_verify

.ci/scripts/test_model.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,6 @@ test_model() {
8787
bash examples/models/llava/install_requirements.sh
8888
STRICT="--no-strict"
8989
fi
90-
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" || "$MODEL_NAME" == "llama3_2_text_decoder" ]]; then
91-
# Install requirements for llama vision.
92-
bash examples/models/llama3_2_vision/install_requirements.sh
93-
fi
9490
if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
9591
# Install requirements for export_llama
9692
bash examples/models/llama/install_requirements.sh
@@ -205,7 +201,7 @@ test_model_with_qnn() {
205201
# TODO(guangyang): Make QNN chipset matches the target device
206202
QNN_CHIPSET=SM8450
207203

208-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
204+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
209205
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
210206
}
211207

.ci/scripts/unittest-linux.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
2424
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
2525
.ci/scripts/setup-linux.sh "$@"
2626

27-
# Install llama3_2_vision dependencies.
28-
PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
29-
3027
.ci/scripts/unittest-linux-cmake.sh
3128
elif [[ "$BUILD_TOOL" == "buck2" ]]; then
3229
# Removing this breaks sccache in the Buck build, apparently

.ci/scripts/unittest-macos.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
2929
# Install llama3_2_vision dependencies.
3030
PYTHON_EXECUTABLE=python \
3131
${CONDA_RUN} --no-capture-output \
32-
./examples/models/llama3_2_vision/install_requirements.sh
3332

3433
.ci/scripts/unittest-macos-cmake.sh
3534
elif [[ "$BUILD_TOOL" == "buck2" ]]; then

.github/scripts/label_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@
2222

2323
LABEL_ERR_MSG_TITLE = "This PR needs a `release notes:` label"
2424
LABEL_ERR_MSG = f"""# {LABEL_ERR_MSG_TITLE}
25-
If your changes are user facing and intended to be a part of release notes, please use a label starting with `release notes:`.
25+
If your change should be included in the release notes (i.e. would users of this library care about this change?), please use a label starting with `release notes:`.
2626
27-
If not, please add the `topic: not user facing` label.
27+
If not, please add the `release notes: none` label.
2828
2929
To add a label, you can comment to pytorchbot, for example
30-
`@pytorchbot label "topic: not user facing"`
30+
`@pytorchbot label "release notes: none"`
3131
3232
For more information, see
3333
https://github.com/pytorch/pytorch/wiki/PyTorch-AutoLabel-Bot#why-categorize-for-release-notes-and-how-does-it-work.
@@ -115,7 +115,7 @@ def has_required_labels(pr: "GitHubPR") -> bool:
115115
pr_labels = pr.get_labels()
116116
# Check if PR is not user facing
117117
is_not_user_facing_pr = any(
118-
label.strip() == "topic: not user facing" for label in pr_labels
118+
label.strip() == "release notes: none" for label in pr_labels
119119
)
120120
return is_not_user_facing_pr or any(
121121
label.strip() in get_release_notes_labels(pr.org, pr.project)

.github/workflows/_link_check.yml

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,51 @@ on:
77

88
jobs:
99
lint-urls:
10+
if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }}
1011
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1112
with:
1213
runner: linux.2xlarge
1314
docker-image: executorch-ubuntu-22.04-linter
14-
submodules: 'none'
15+
submodules: false
1516
fetch-depth: 0
1617
ref: ${{ inputs.ref }}
17-
timeout: 90
18+
timeout: 120
1819
script: |
1920
./scripts/lint_urls.sh $(
20-
[ "${{ github.event_name }}" = "pull_request" ] \
21-
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
22-
|| [ "${{ github.event_name }}" = "push" ] \
23-
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
24-
)
21+
{ [ "${{ github.event_name }}" = "pull_request" ] \
22+
&& git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
23+
|| \
24+
{ [ "${{ github.event_name }}" = "push" ] \
25+
&& git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
26+
) || {
27+
echo
28+
echo "URL lint failed."
29+
echo "If this is a transient outage, you can bypass it by adding the \`skip-url-lint\` label to your PR."
30+
echo "Or add \`@lint-ignore\` somewhere on the same line as the URL you want to skip checking."
31+
exit 1
32+
}
2533
2634
lint-xrefs:
35+
if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-xref-lint') }}
2736
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
2837
with:
2938
runner: linux.2xlarge
3039
docker-image: executorch-ubuntu-22.04-linter
31-
submodules: 'none'
40+
submodules: false
3241
fetch-depth: 0
3342
ref: ${{ inputs.ref }}
34-
timeout: 90
43+
timeout: 60
3544
script: |
3645
./scripts/lint_xrefs.sh $(
37-
[ "${{ github.event_name }}" = "pull_request" ] \
38-
&& git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
39-
|| [ "${{ github.event_name }}" = "push" ] \
40-
&& git diff --name-only ${{ github.event.before }} ${{ github.sha }}
41-
)
46+
{ [ "${{ github.event_name }}" = "pull_request" ] \
47+
&& git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
48+
|| \
49+
{ [ "${{ github.event_name }}" = "push" ] \
50+
&& git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
51+
) || {
52+
echo
53+
echo "Xref lint failed."
54+
echo "If this is a transient outage, you can bypass it by adding the \`skip-xref-lint\` label to your PR."
55+
echo "Or add \`@lint-ignore\` somewhere on the same line as the reference you want to skip checking."
56+
exit 1
57+
}

.github/workflows/apple.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ on:
55
branches:
66
- main
77
- release/*
8+
tags:
9+
- ciflow/trunk/*
810
pull_request:
911
paths:
1012
- .ci/scripts/setup-ios.sh

.github/workflows/build-presets.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: Build Presets
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- main
8+
- release/*
9+
workflow_dispatch:
10+
11+
concurrency:
12+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
13+
cancel-in-progress: true
14+
15+
jobs:
16+
apple:
17+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
18+
strategy:
19+
matrix:
20+
preset: [macos-arm64]
21+
with:
22+
job-name: build
23+
runner: macos-latest-xlarge
24+
python-version: 3.12
25+
submodules: recursive
26+
script: |
27+
set -eux
28+
${CONDA_RUN} ./install_requirements.sh > /dev/null
29+
${CONDA_RUN} cmake --preset ${{ matrix.preset }}
30+
${CONDA_RUN} cmake --build cmake-out --parallel

.github/workflows/pull.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,7 @@ jobs:
434434
output=$(ls -la cmake-out/test/size_test)
435435
arr=($output)
436436
size=${arr[4]}
437-
# threshold=48120 on devserver with gcc11.4
438-
# todo(lfq): update once binary size is below 50kb.
439-
threshold="47552"
437+
threshold="47560"
440438
if [[ "$size" -le "$threshold" ]]; then
441439
echo "Success $size <= $threshold"
442440
else

.github/workflows/trunk.yml

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -555,11 +555,11 @@ jobs:
555555
strategy:
556556
matrix:
557557
hf_model_id: [
558-
google/gemma-2-2b,
559-
Qwen/Qwen2.5-0.5B,
558+
google/gemma-3-1b-it,
559+
Qwen/Qwen3-0.6B,
560560
HuggingFaceTB/SmolLM2-135M,
561561
meta-llama/Llama-3.2-1B,
562-
allenai/OLMo-1B-hf
562+
allenai/OLMo-1B-hf,
563563
]
564564
fail-fast: false
565565
with:
@@ -569,44 +569,102 @@ jobs:
569569
submodules: 'recursive'
570570
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
571571
timeout: 90
572+
upload-artifact: profiling-artifacts-${{ strategy.job-index }}
572573
script: |
573574
echo "::group::Set up ExecuTorch"
574575
# The generic Linux job chooses to use base env, not the one setup by the image
575576
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
576577
conda activate "${CONDA_ENV}"
577578
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
579+
# Build executor_runner with ETdump enabled
580+
PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
581+
-DCMAKE_INSTALL_PREFIX=cmake-out \
582+
-DEXECUTORCH_ENABLE_LOGGING=1 \
583+
-DCMAKE_BUILD_TYPE=Release \
584+
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
585+
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
586+
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
587+
-DEXECUTORCH_BUILD_XNNPACK=ON \
588+
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
589+
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
590+
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
591+
-DEXECUTORCH_BUILD_DEVTOOLS=ON \
592+
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
593+
-Bcmake-out .
594+
cmake --build cmake-out -j16 --target install --config Release
578595
echo "::endgroup::"
579596
580597
echo "::group::Set up Hugging Face"
581598
pip install -U "huggingface_hub[cli]"
582599
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
583600
git clone https://github.com/huggingface/optimum-executorch
584-
cd optimum-executorch
601+
pushd optimum-executorch
585602
# There is no release yet, for CI stability, always test from the same commit on main
586-
git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
603+
git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
587604
pip install .[tests]
605+
popd
606+
607+
if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
608+
# Fixes for gemma-3 is not available in the released version
609+
git clone https://github.com/huggingface/transformers.git
610+
pushd transformers
611+
git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
612+
pip install -e .
613+
popd
614+
fi
588615
pip list
589616
echo "::endgroup::"
590617
591-
echo "::group::Export and Run ${{ matrix.hf_model_id }}"
618+
echo "::group::Export to ExecuTorch"
592619
# Pass matrix variable as environment variable
593620
export MODEL_ID="${{ matrix.hf_model_id }}"
621+
export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w"
622+
pushd optimum-executorch
623+
624+
optimum-cli export executorch \
625+
--model ${MODEL_ID} \
626+
--task text-generation \
627+
--recipe xnnpack \
628+
--use_custom_sdpa \
629+
--output_dir ${OUTPUT_DIR} \
630+
--qlinear
631+
632+
ls -FlAGhp ${OUTPUT_DIR}
633+
popd
634+
echo "::endgroup::"
635+
636+
echo "::group::Inference using python API"
637+
pushd optimum-executorch
594638
python -c "
595639
import os
596640
from optimum.executorch import ExecuTorchModelForCausalLM
597641
from transformers import AutoTokenizer
598642
599643
model_id = os.getenv('MODEL_ID')
600-
print(f'Loading model: {model_id}')
601-
model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
602-
tokenizer = AutoTokenizer.from_pretrained(model_id)
644+
pte_dir = os.getenv('OUTPUT_DIR')
645+
print(f'Loading model {model_id} from {pte_dir}.')
646+
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
603647
generated_text = model.text_generation(
604-
tokenizer=tokenizer,
648+
tokenizer=AutoTokenizer.from_pretrained(model_id),
605649
prompt='Simply put, the theory of relativity states that',
606650
max_seq_len=64
607651
)
608652
print(generated_text)
609653
"
654+
popd
655+
echo "::endgroup::"
656+
657+
echo "::group::Inference using executor_runner with ETDump"
658+
./cmake-out/executor_runner \
659+
--model_path ${OUTPUT_DIR}/model.pte \
660+
--etdump_path ${OUTPUT_DIR}/etdump.etdp
661+
662+
export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
663+
mkdir -p $(dirname "$TSV_PATH")
664+
python3 -m devtools.inspector.inspector_cli \
665+
--etdump_path ${OUTPUT_DIR}/etdump.etdp \
666+
--tsv_path ${TSV_PATH}
667+
610668
echo "::endgroup::"
611669
612670

0 commit comments

Comments
 (0)