pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_llama.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llama.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/extract_benchmark_results.py
Lines changed: 250 additions & 0 deletions b/‎.github/scripts/extract_benchmark_results.py
Lines changed: 250 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 78 additions & 7 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 78 additions & 7 deletions
diff --git a/‎.github/workflows/android.yml
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/android.yml
Lines changed: 1 addition & 0 deletions
@@ -1 +1 @@
-aec9b2ab77389967ef39bb9c10662fd0fe3e185a
+d1b87e26e5c4343f5b56bb1e6f89b479b389bfac
@@ -1,5 +1,5 @@
 mpmath==1.3.0
-numpy==1.21.3; python_version == '3.10'
+numpy==1.22.0; python_version == '3.10'
 numpy==1.23.2; python_version == '3.11'
 numpy; python_version >= '3.12'
 PyYAML==6.0.1
 
@@ -213,7 +213,7 @@ echo "Creating tokenizer.bin"
 $PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
 
 
-RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10"
+RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10 --warmup=1"
 # Check build tool.
 echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
 if [[ "${BUILD_TOOL}" == "buck2" ]]; then
 
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import logging
+import os
+import re
+import time
+import zipfile
+from argparse import Action, ArgumentParser, Namespace
+from io import BytesIO
+from logging import info, warning
+from typing import Any, List, Optional
+from urllib import error, request
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
+ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
+
+
+class ValidateArtifacts(Action):
+    def __call__(
+        self,
+        parser: ArgumentParser,
+        namespace: Namespace,
+        values: Any,
+        option_string: Optional[str] = None,
+    ) -> None:
+        if os.path.isfile(values) and values.endswith(".json"):
+            setattr(namespace, self.dest, values)
+            return
+
+        parser.error(f"{values} is not a valid JSON file (*.json)")
+
+
+class ValidateOutputDir(Action):
+    def __call__(
+        self,
+        parser: ArgumentParser,
+        namespace: Namespace,
+        values: Any,
+        option_string: Optional[str] = None,
+    ) -> None:
+        if os.path.isdir(values):
+            setattr(namespace, self.dest, values)
+            return
+
+        parser.error(f"{values} is not a valid directory")
+
+
+def parse_args() -> Any:
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser("extract benchmark results from AWS Device Farm artifacts")
+    parser.add_argument(
+        "--artifacts",
+        type=str,
+        required=True,
+        action=ValidateArtifacts,
+        help="the list of artifacts from AWS in JSON format",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        required=True,
+        action=ValidateOutputDir,
+        help="the directory to keep the benchmark results",
+    )
+    parser.add_argument(
+        "--repo",
+        type=str,
+        required=True,
+        help="which GitHub repo this workflow run belongs to",
+    )
+    parser.add_argument(
+        "--head-branch",
+        type=str,
+        required=True,
+        help="the head branch that runs",
+    )
+    parser.add_argument(
+        "--workflow-name",
+        type=str,
+        required=True,
+        help="the name of the benchmark workflow",
+    )
+    parser.add_argument(
+        "--workflow-run-id",
+        type=int,
+        required=True,
+        help="the id of the benchmark workflow",
+    )
+    parser.add_argument(
+        "--workflow-run-attempt",
+        type=int,
+        required=True,
+        help="which retry of the workflow this is",
+    )
+
+    return parser.parse_args()
+
+
+def extract_android_benchmark_results(
+    job_name: str, artifact_type: str, artifact_s3_url: str
+) -> List:
+    """
+    The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT
+    artifact, so we will just need to get it
+
+    Return the list of benchmark results.
+    """
+    if artifact_type != "CUSTOMER_ARTIFACT":
+        return []
+
+    try:
+        with request.urlopen(artifact_s3_url) as data:
+            with zipfile.ZipFile(BytesIO(data.read())) as customer_artifact:
+                for name in customer_artifact.namelist():
+                    if BENCHMARK_RESULTS_FILENAME in name:
+                        return json.loads(customer_artifact.read(name))
+
+    except error.HTTPError:
+        warning(f"Fail to {artifact_type} {artifact_s3_url}")
+        return []
+    except json.decoder.JSONDecodeError:
+        # This is to handle the case where there is no benchmark results
+        warning(f"Fail to load the benchmark results from {artifact_s3_url}")
+        return []
+
+
+def extract_job_id(artifacts_filename: str) -> int:
+    """
+    Extract the job id from the artifacts filename
+    """
+    m = ARTIFACTS_FILENAME_REGEX.match(os.path.basename(artifacts_filename))
+    if not m:
+        return 0
+    return int(m.group("job_id"))
+
+
+def transform(
+    app_type: str,
+    benchmark_results: List,
+    repo: str,
+    head_branch: str,
+    workflow_name: str,
+    workflow_run_id: int,
+    workflow_run_attempt: int,
+    job_name: str,
+    job_id: int,
+) -> List:
+    """
+    Transform the benchmark results into the format writable into the benchmark database
+    """
+    # Overwrite the device name here with the job name as it has more information about
+    # the device, i.e. Samsung Galaxy S22 5G instead of just Samsung
+    for r in benchmark_results:
+        r["deviceInfo"]["device"] = job_name
+
+    # TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
+    # and I'm trying to fit ET benchmark results into it, which is kind of awkward.
+    # However, the schema is going to be updated soon
+    return [
+        {
+            # GH-info to identify where the benchmark is run
+            "repo": repo,
+            "head_branch": head_branch,
+            "workflow_id": workflow_run_id,
+            "run_attempt": workflow_run_attempt,
+            "job_id": job_id,
+            # The model
+            "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
+            "dtype": (
+                r["benchmarkModel"]["quantization"]
+                if r["benchmarkModel"]["quantization"]
+                else "unknown"
+            ),
+            # The metric value
+            "metric": r["metric"],
+            "actual": r["actualValue"],
+            "target": r["targetValue"],
+            # The device
+            "device": r["deviceInfo"]["device"],
+            "arch": r["deviceInfo"].get("os", ""),
+            # Not used here, just set it to something unique here
+            "filename": workflow_name,
+            "test_name": app_type,
+            "runner": job_name,
+        }
+        for r in benchmark_results
+    ]
+
+
+def main() -> None:
+    args = parse_args()
+
+    # Across all devices
+    all_benchmark_results = []
+
+    with open(args.artifacts) as f:
+        for artifact in json.load(f):
+            app_type = artifact.get("app_type", "")
+            # We expect this to be set to either ANDROID_APP or IOS_APP
+            if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]:
+                info(
+                    f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}"
+                )
+                continue
+
+            job_name = artifact["job_name"]
+            artifact_type = artifact["type"]
+            artifact_s3_url = artifact["s3_url"]
+
+            if app_type == "ANDROID_APP":
+                benchmark_results = extract_android_benchmark_results(
+                    job_name, artifact_type, artifact_s3_url
+                )
+                if benchmark_results:
+                    benchmark_results = transform(
+                        app_type,
+                        benchmark_results,
+                        args.repo,
+                        args.head_branch,
+                        args.workflow_name,
+                        args.workflow_run_id,
+                        args.workflow_run_attempt,
+                        job_name,
+                        extract_job_id(args.artifacts),
+                    )
+                    all_benchmark_results.extend(benchmark_results)
+
+            if app_type == "IOS_APP":
+                # TODO (huydhn): Implement the logic for iOS next
+                pass
+
+    if all_benchmark_results:
+        output_file = os.path.basename(args.artifacts)
+        with open(f"{args.output_dir}/{output_file}", "w") as f:
+            json.dump(all_benchmark_results, f)
+
+
+if __name__ == "__main__":
+    main()
@@ -176,8 +176,8 @@ jobs:
         fi
         echo "::endgroup::"
 
-  build-llm-demo:
-    name: build-llm-demo
+  build-benchmark-app:
+    name: build-benchmark-app
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     needs: set-parameters
     with:
@@ -211,7 +211,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
     needs:
       - set-parameters
-      - build-llm-demo
+      - build-benchmark-app
       - export-models
     strategy:
       matrix:
@@ -228,13 +228,84 @@ jobs:
       # This is the ARN of ExecuTorch project on AWS
       project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
       device-pool-arn: ${{ matrix.device }}
-      # Uploaded to S3 from the previous job, the name of the app comes from the project itself.
-      # Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
-      # It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
-      # one app+flavor that could load and run the model.
       android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
       android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
       # NB: Need to set the default spec here so that it works for periodic too
       test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
       # Uploaded to S3 from the previous job
       extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
+
+  upload-benchmark-results:
+    needs:
+      - benchmark-on-device
+    if: always()
+    runs-on: linux.2xlarge
+    environment: upload-benchmark-results
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: false
+
+      - name: Authenticate with AWS
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
+          # The max duration enforced by the server side
+          role-duration-seconds: 18000
+          aws-region: us-east-1
+
+      - name: Setup conda
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        with:
+          python-version: '3.10'
+
+      - name: Download the list of artifacts from S3
+        env:
+          ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
+        shell: bash
+        run: |
+          set -eux
+          ${CONDA_RUN} python -mpip install awscli==1.32.18
+
+          mkdir -p artifacts
+          pushd artifacts
+          ${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
+          popd
+
+          ls -lah artifacts
+
+      - name: Extract the benchmark results JSON
+        shell: bash
+        run: |
+          set -eux
+
+          mkdir -p benchmark-results
+
+          for ARTIFACTS_BY_JOB in artifacts/*.json; do
+            [ -f "${ARTIFACTS_BY_JOB}" ] || break
+            echo "${ARTIFACTS_BY_JOB}"
+            ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
+              --artifacts "${ARTIFACTS_BY_JOB}" \
+              --output-dir benchmark-results \
+              --repo ${{ github.repository }} \
+              --head-branch ${{ github.head_ref || github.ref_name }} \
+              --workflow-name "${{ github.workflow }}" \
+              --workflow-run-id ${{ github.run_id }} \
+              --workflow-run-attempt ${{ github.run_attempt }}
+          done
+
+          ls -lah benchmark-results
+
+          for BENCHMARK_RESULTS in benchmark-results/*.json; do
+            cat "${BENCHMARK_RESULTS}"
+            echo
+          done
+
+      - name: Upload the benchmark results
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: 'benchmark-results'
+          dry-run: false
@@ -15,6 +15,7 @@ on:
       - install_requirements.sh
       - examples/demo-apps/android/**
       - extension/android/**
+      - extension/benchmark/android/**
       - extension/module/**
   workflow_dispatch:
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-aec9b2ab77389967ef39bb9c10662fd0fe3e185a`
	`1`	`+d1b87e26e5c4343f5b56bb1e6f89b479b389bfac`