pytorch
diff --git a/‎.buckconfig
Lines changed: 1 addition & 0 deletions b/‎.buckconfig
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/buck2.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/buck2.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 2 deletions b/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 2 deletions
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 9 additions & 18 deletions b/‎.ci/scripts/test_llava.sh
Lines changed: 9 additions & 18 deletions
diff --git a/‎.ci/scripts/test_model.sh
Lines changed: 1 addition & 5 deletions b/‎.ci/scripts/test_model.sh
Lines changed: 1 addition & 5 deletions
diff --git a/‎.ci/scripts/unittest-linux.sh
Lines changed: 0 additions & 3 deletions b/‎.ci/scripts/unittest-linux.sh
Lines changed: 0 additions & 3 deletions
diff --git a/‎.ci/scripts/unittest-macos.sh
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/unittest-macos.sh
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/scripts/label_utils.py
Lines changed: 4 additions & 4 deletions b/‎.github/scripts/label_utils.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/_link_check.yml
Lines changed: 30 additions & 14 deletions b/‎.github/workflows/_link_check.yml
Lines changed: 30 additions & 14 deletions
diff --git a/‎.github/workflows/apple.yml
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/apple.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/build-presets.yml
Lines changed: 30 additions & 0 deletions b/‎.github/workflows/build-presets.yml
Lines changed: 30 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 1 addition & 3 deletions b/‎.github/workflows/pull.yml
Lines changed: 1 addition & 3 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 68 additions & 10 deletions b/‎.github/workflows/trunk.yml
Lines changed: 68 additions & 10 deletions
@@ -39,6 +39,7 @@
 
 [buck2]
 restarter=true
+file_watcher=notify
 
 [oss]
 folly_cxx_tests = False
@@ -1 +1 @@
-2024-12-16
+2025-05-06
@@ -39,8 +39,7 @@ set_up_aot() {
       -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
       -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
       -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
-      -DPYTHON_EXECUTABLE=python3 \
-      -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
+      -DPYTHON_EXECUTABLE=python3
   cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
   # install Python APIs to correct import path
   # The filename might vary depending on your Python and host version.
 
@@ -93,8 +93,7 @@ cmake_build_llava_runner_for_android() {
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI=arm64-v8a                                                 \
         ${LLAVA_COMMON_CMAKE_ARGS}                                              \
-        -DCMAKE_PREFIX_PATH="$python_lib"                  \
-        -DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON                                  \
+        -DCMAKE_PREFIX_PATH="$python_lib"                                       \
         -B${BUILD_DIR}/${dir}                                                   \
         ${dir}
 
@@ -107,11 +106,10 @@ export_llava() {
     $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
 }
 
-# Download a new image with different size, to test if the model can handle different image sizes
-prepare_image_tensor() {
+# Download a new image
+download_image() {
     echo "Downloading image"
     curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
-    $PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
 }
 
 run_and_verify() {
@@ -121,20 +119,18 @@ run_and_verify() {
         echo "Export failed. Abort"
         exit 1
     fi
-    if [[ ! -f "image.pt" ]]; then
-        echo "image.pt is missing."
+    if [[ ! -f "basketball.jpg" ]]; then
+        echo "basketball.jpg is missing."
         exit 1
     fi
     if [[ ! -f "tokenizer.bin" ]]; then
         echo "tokenizer.bin is missing."
         exit 1
     fi
 
-
-
     RUNTIME_ARGS="--model_path=llava.pte    \
         --tokenizer_path=tokenizer.bin      \
-        --image_path=image.pt               \
+        --image_path=basketball.jpg         \
         --prompt=ASSISTANT:                 \
         --temperature=0                     \
         --seq_len=650"
@@ -149,13 +145,8 @@ run_and_verify() {
 
     # verify result.txt
     RESULT=$(cat result.txt)
-    # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
-    if [[ "$(uname)" == "Darwin" ]]; then
-        EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
-    else
-        # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
-        EXPECTED_PREFIX="ASSISTANT: image"
-    fi
+    EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. "
+
     if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
         echo "Expected result prefix: ${EXPECTED_PREFIX}"
         echo "Actual result: ${RESULT}"
@@ -184,5 +175,5 @@ fi
 export_llava
 
 # Step3. Run
-prepare_image_tensor
+download_image
 run_and_verify
@@ -87,10 +87,6 @@ test_model() {
     bash examples/models/llava/install_requirements.sh
     STRICT="--no-strict"
   fi
-  if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" || "$MODEL_NAME" == "llama3_2_text_decoder" ]]; then
-    # Install requirements for llama vision.
-    bash examples/models/llama3_2_vision/install_requirements.sh
-  fi
   if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
@@ -205,7 +201,7 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
 
@@ -24,9 +24,6 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
     CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
     .ci/scripts/setup-linux.sh "$@"
 
-    # Install llama3_2_vision dependencies.
-    PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
-
     .ci/scripts/unittest-linux-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
     # Removing this breaks sccache in the Buck build, apparently
 
@@ -29,7 +29,6 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
     # Install llama3_2_vision dependencies.
     PYTHON_EXECUTABLE=python \
     ${CONDA_RUN} --no-capture-output \
-    ./examples/models/llama3_2_vision/install_requirements.sh
 
     .ci/scripts/unittest-macos-cmake.sh
 elif [[ "$BUILD_TOOL" == "buck2" ]]; then
 
@@ -22,12 +22,12 @@
 
 LABEL_ERR_MSG_TITLE = "This PR needs a `release notes:` label"
 LABEL_ERR_MSG = f"""# {LABEL_ERR_MSG_TITLE}
-If your changes are user facing and intended to be a part of release notes, please use a label starting with `release notes:`.
+If your change should be included in the release notes (i.e. would users of this library care about this change?), please use a label starting with `release notes:`.
 
-If not, please add the `topic: not user facing` label.
+If not, please add the `release notes: none` label.
 
 To add a label, you can comment to pytorchbot, for example
-`@pytorchbot label "topic: not user facing"`
+`@pytorchbot label "release notes: none"`
 
 For more information, see
 https://github.com/pytorch/pytorch/wiki/PyTorch-AutoLabel-Bot#why-categorize-for-release-notes-and-how-does-it-work.
@@ -115,7 +115,7 @@ def has_required_labels(pr: "GitHubPR") -> bool:
     pr_labels = pr.get_labels()
     # Check if PR is not user facing
     is_not_user_facing_pr = any(
-        label.strip() == "topic: not user facing" for label in pr_labels
+        label.strip() == "release notes: none" for label in pr_labels
     )
     return is_not_user_facing_pr or any(
         label.strip() in get_release_notes_labels(pr.org, pr.project)
 
@@ -7,35 +7,51 @@ on:
 
 jobs:
   lint-urls:
+    if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
+      submodules: false
       fetch-depth: 0
       ref: ${{ inputs.ref }}
-      timeout: 90
+      timeout: 120
       script: |
         ./scripts/lint_urls.sh $(
-          [ "${{ github.event_name }}" = "pull_request" ] \
-            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
-          || [ "${{ github.event_name }}" = "push" ] \
-            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
-        )
+          { [ "${{ github.event_name }}" = "pull_request" ] \
+              && git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
+          || \
+          { [ "${{ github.event_name }}" = "push" ] \
+              && git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
+        ) || {
+          echo
+          echo "URL lint failed."
+          echo "If this is a transient outage, you can bypass it by adding the \`skip-url-lint\` label to your PR."
+          echo "Or add \`@lint-ignore\` somewhere on the same line as the URL you want to skip checking."
+          exit 1
+        }
 
   lint-xrefs:
+    if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-xref-lint') }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-linter
-      submodules: 'none'
+      submodules: false
       fetch-depth: 0
       ref: ${{ inputs.ref }}
-      timeout: 90
+      timeout: 60
       script: |
         ./scripts/lint_xrefs.sh $(
-          [ "${{ github.event_name }}" = "pull_request" ] \
-            && git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
-          || [ "${{ github.event_name }}" = "push" ] \
-            && git diff --name-only ${{ github.event.before }} ${{ github.sha }}
-        )
+          { [ "${{ github.event_name }}" = "pull_request" ] \
+              && git diff --name-only "${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"; } \
+          || \
+          { [ "${{ github.event_name }}" = "push" ] \
+              && git diff --name-only "${{ github.event.before }}...${{ github.sha }}"; }
+        ) || {
+          echo
+          echo "Xref lint failed."
+          echo "If this is a transient outage, you can bypass it by adding the \`skip-xref-lint\` label to your PR."
+          echo "Or add \`@lint-ignore\` somewhere on the same line as the reference you want to skip checking."
+          exit 1
+        }
@@ -5,6 +5,8 @@ on:
     branches:
       - main
       - release/*
+    tags:
+      - ciflow/trunk/*
   pull_request:
     paths:
       - .ci/scripts/setup-ios.sh
 
@@ -0,0 +1,30 @@
+name: Build Presets
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+      - release/*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  apple:
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        preset: [macos-arm64]
+    with:
+      job-name: build
+      runner: macos-latest-xlarge
+      python-version: 3.12
+      submodules: recursive
+      script: |
+        set -eux
+        ${CONDA_RUN} ./install_requirements.sh > /dev/null
+        ${CONDA_RUN} cmake --preset ${{ matrix.preset }}
+        ${CONDA_RUN} cmake --build cmake-out --parallel
@@ -434,9 +434,7 @@ jobs:
         output=$(ls -la cmake-out/test/size_test)
         arr=($output)
         size=${arr[4]}
-        # threshold=48120 on devserver with gcc11.4
-        # todo(lfq): update once binary size is below 50kb.
-        threshold="47552"
+        threshold="47560"
         if [[ "$size" -le "$threshold" ]]; then
           echo "Success $size <= $threshold"
         else
 
@@ -555,11 +555,11 @@ jobs:
     strategy:
       matrix:
         hf_model_id: [
-          google/gemma-2-2b,
-          Qwen/Qwen2.5-0.5B,
+          google/gemma-3-1b-it,
+          Qwen/Qwen3-0.6B,
           HuggingFaceTB/SmolLM2-135M,
           meta-llama/Llama-3.2-1B,
-          allenai/OLMo-1B-hf
+          allenai/OLMo-1B-hf,
         ]
       fail-fast: false
     with:
@@ -569,44 +569,102 @@ jobs:
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
+      upload-artifact: profiling-artifacts-${{ strategy.job-index }}
       script: |
         echo "::group::Set up ExecuTorch"
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+        # Build executor_runner with ETdump enabled
+        PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
+          -DCMAKE_INSTALL_PREFIX=cmake-out \
+          -DEXECUTORCH_ENABLE_LOGGING=1 \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+          -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+          -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+          -DEXECUTORCH_BUILD_XNNPACK=ON \
+          -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+          -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+          -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+          -DEXECUTORCH_BUILD_DEVTOOLS=ON \
+          -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+          -Bcmake-out .
+        cmake --build cmake-out -j16 --target install --config Release
         echo "::endgroup::"
 
         echo "::group::Set up Hugging Face"
         pip install -U "huggingface_hub[cli]"
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         git clone https://github.com/huggingface/optimum-executorch
-        cd optimum-executorch
+        pushd optimum-executorch
         # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout 577a2b19670e4c643a5c6ecb09bf47b9a699e7c6
+        git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8
         pip install .[tests]
+        popd
+
+        if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then
+          # Fixes for gemma-3 is not available in the released version
+          git clone https://github.com/huggingface/transformers.git
+          pushd transformers
+          git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f
+          pip install -e .
+          popd
+        fi
         pip list
         echo "::endgroup::"
 
-        echo "::group::Export and Run ${{ matrix.hf_model_id }}"
+        echo "::group::Export to ExecuTorch"
         # Pass matrix variable as environment variable
         export MODEL_ID="${{ matrix.hf_model_id }}"
+        export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w"
+        pushd optimum-executorch
+
+        optimum-cli export executorch \
+          --model ${MODEL_ID} \
+          --task text-generation \
+          --recipe xnnpack \
+          --use_custom_sdpa \
+          --output_dir ${OUTPUT_DIR} \
+          --qlinear
+
+        ls -FlAGhp ${OUTPUT_DIR}
+        popd
+        echo "::endgroup::"
+
+        echo "::group::Inference using python API"
+        pushd optimum-executorch
         python -c "
         import os
         from optimum.executorch import ExecuTorchModelForCausalLM
         from transformers import AutoTokenizer
 
         model_id = os.getenv('MODEL_ID')
-        print(f'Loading model: {model_id}')
-        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        pte_dir = os.getenv('OUTPUT_DIR')
+        print(f'Loading model {model_id} from {pte_dir}.')
+        model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
         generated_text = model.text_generation(
-          tokenizer=tokenizer,
+          tokenizer=AutoTokenizer.from_pretrained(model_id),
           prompt='Simply put, the theory of relativity states that',
           max_seq_len=64
         )
         print(generated_text)
         "
+        popd
+        echo "::endgroup::"
+
+        echo "::group::Inference using executor_runner with ETDump"
+        ./cmake-out/executor_runner \
+          --model_path ${OUTPUT_DIR}/model.pte \
+          --etdump_path ${OUTPUT_DIR}/etdump.etdp
+
+        export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
+        mkdir -p $(dirname "$TSV_PATH")
+        python3 -m devtools.inspector.inspector_cli \
+          --etdump_path ${OUTPUT_DIR}/etdump.etdp \
+          --tsv_path ${TSV_PATH}
+
         echo "::endgroup::"