Skip to content

Commit

Permalink
2024-10-09 nightly release (a6b213b)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Oct 9, 2024
1 parent 8865273 commit d71daf4
Show file tree
Hide file tree
Showing 1,059 changed files with 14,328 additions and 11,074 deletions.
2 changes: 1 addition & 1 deletion .ci/docker/ci_commit_pins/pytorch.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
aec9b2ab77389967ef39bb9c10662fd0fe3e185a
d1b87e26e5c4343f5b56bb1e6f89b479b389bfac
2 changes: 1 addition & 1 deletion .ci/docker/requirements-ci.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
mpmath==1.3.0
numpy==1.21.3; python_version == '3.10'
numpy==1.22.0; python_version == '3.10'
numpy==1.23.2; python_version == '3.11'
numpy; python_version >= '3.12'
PyYAML==6.0.1
Expand Down
2 changes: 1 addition & 1 deletion .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ echo "Creating tokenizer.bin"
$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin


RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10"
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=tokenizer.bin --prompt=Once --temperature=0 --seq_len=10 --warmup=1"
# Check build tool.
echo "Running ${EXPORTED_MODEL_NAME} in portable mode"
if [[ "${BUILD_TOOL}" == "buck2" ]]; then
Expand Down
250 changes: 250 additions & 0 deletions .github/scripts/extract_benchmark_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import json
import logging
import os
import re
import time
import zipfile
from argparse import Action, ArgumentParser, Namespace
from io import BytesIO
from logging import info, warning
from typing import Any, List, Optional
from urllib import error, request


logging.basicConfig(level=logging.INFO)


BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")


class ValidateArtifacts(Action):
def __call__(
self,
parser: ArgumentParser,
namespace: Namespace,
values: Any,
option_string: Optional[str] = None,
) -> None:
if os.path.isfile(values) and values.endswith(".json"):
setattr(namespace, self.dest, values)
return

parser.error(f"{values} is not a valid JSON file (*.json)")


class ValidateOutputDir(Action):
def __call__(
self,
parser: ArgumentParser,
namespace: Namespace,
values: Any,
option_string: Optional[str] = None,
) -> None:
if os.path.isdir(values):
setattr(namespace, self.dest, values)
return

parser.error(f"{values} is not a valid directory")


def parse_args() -> Any:
from argparse import ArgumentParser

parser = ArgumentParser("extract benchmark results from AWS Device Farm artifacts")
parser.add_argument(
"--artifacts",
type=str,
required=True,
action=ValidateArtifacts,
help="the list of artifacts from AWS in JSON format",
)
parser.add_argument(
"--output-dir",
type=str,
required=True,
action=ValidateOutputDir,
help="the directory to keep the benchmark results",
)
parser.add_argument(
"--repo",
type=str,
required=True,
help="which GitHub repo this workflow run belongs to",
)
parser.add_argument(
"--head-branch",
type=str,
required=True,
help="the head branch that runs",
)
parser.add_argument(
"--workflow-name",
type=str,
required=True,
help="the name of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-id",
type=int,
required=True,
help="the id of the benchmark workflow",
)
parser.add_argument(
"--workflow-run-attempt",
type=int,
required=True,
help="which retry of the workflow this is",
)

return parser.parse_args()


def extract_android_benchmark_results(
job_name: str, artifact_type: str, artifact_s3_url: str
) -> List:
"""
The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT
artifact, so we will just need to get it
Return the list of benchmark results.
"""
if artifact_type != "CUSTOMER_ARTIFACT":
return []

try:
with request.urlopen(artifact_s3_url) as data:
with zipfile.ZipFile(BytesIO(data.read())) as customer_artifact:
for name in customer_artifact.namelist():
if BENCHMARK_RESULTS_FILENAME in name:
return json.loads(customer_artifact.read(name))

except error.HTTPError:
warning(f"Fail to {artifact_type} {artifact_s3_url}")
return []
except json.decoder.JSONDecodeError:
# This is to handle the case where there is no benchmark results
warning(f"Fail to load the benchmark results from {artifact_s3_url}")
return []


def extract_job_id(artifacts_filename: str) -> int:
"""
Extract the job id from the artifacts filename
"""
m = ARTIFACTS_FILENAME_REGEX.match(os.path.basename(artifacts_filename))
if not m:
return 0
return int(m.group("job_id"))


def transform(
app_type: str,
benchmark_results: List,
repo: str,
head_branch: str,
workflow_name: str,
workflow_run_id: int,
workflow_run_attempt: int,
job_name: str,
job_id: int,
) -> List:
"""
Transform the benchmark results into the format writable into the benchmark database
"""
# Overwrite the device name here with the job name as it has more information about
# the device, i.e. Samsung Galaxy S22 5G instead of just Samsung
for r in benchmark_results:
r["deviceInfo"]["device"] = job_name

# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
# However, the schema is going to be updated soon
return [
{
# GH-info to identify where the benchmark is run
"repo": repo,
"head_branch": head_branch,
"workflow_id": workflow_run_id,
"run_attempt": workflow_run_attempt,
"job_id": job_id,
# The model
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
"dtype": (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
),
# The metric value
"metric": r["metric"],
"actual": r["actualValue"],
"target": r["targetValue"],
# The device
"device": r["deviceInfo"]["device"],
"arch": r["deviceInfo"].get("os", ""),
# Not used here, just set it to something unique here
"filename": workflow_name,
"test_name": app_type,
"runner": job_name,
}
for r in benchmark_results
]


def main() -> None:
args = parse_args()

# Across all devices
all_benchmark_results = []

with open(args.artifacts) as f:
for artifact in json.load(f):
app_type = artifact.get("app_type", "")
# We expect this to be set to either ANDROID_APP or IOS_APP
if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]:
info(
f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}"
)
continue

job_name = artifact["job_name"]
artifact_type = artifact["type"]
artifact_s3_url = artifact["s3_url"]

if app_type == "ANDROID_APP":
benchmark_results = extract_android_benchmark_results(
job_name, artifact_type, artifact_s3_url
)
if benchmark_results:
benchmark_results = transform(
app_type,
benchmark_results,
args.repo,
args.head_branch,
args.workflow_name,
args.workflow_run_id,
args.workflow_run_attempt,
job_name,
extract_job_id(args.artifacts),
)
all_benchmark_results.extend(benchmark_results)

if app_type == "IOS_APP":
# TODO (huydhn): Implement the logic for iOS next
pass

if all_benchmark_results:
output_file = os.path.basename(args.artifacts)
with open(f"{args.output_dir}/{output_file}", "w") as f:
json.dump(all_benchmark_results, f)


if __name__ == "__main__":
main()
85 changes: 78 additions & 7 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ jobs:
fi
echo "::endgroup::"
build-llm-demo:
name: build-llm-demo
build-benchmark-app:
name: build-benchmark-app
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
needs: set-parameters
with:
Expand Down Expand Up @@ -211,7 +211,7 @@ jobs:
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
needs:
- set-parameters
- build-llm-demo
- build-benchmark-app
- export-models
strategy:
matrix:
Expand All @@ -228,13 +228,84 @@ jobs:
# This is the ARN of ExecuTorch project on AWS
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
device-pool-arn: ${{ matrix.device }}
# Uploaded to S3 from the previous job, the name of the app comes from the project itself.
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
# one app+flavor that could load and run the model.
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
# NB: Need to set the default spec here so that it works for periodic too
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
# Uploaded to S3 from the previous job
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

upload-benchmark-results:
needs:
- benchmark-on-device
if: always()
runs-on: linux.2xlarge
environment: upload-benchmark-results
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3
with:
submodules: false

- name: Authenticate with AWS
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
# The max duration enforced by the server side
role-duration-seconds: 18000
aws-region: us-east-1

- name: Setup conda
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
with:
python-version: '3.10'

- name: Download the list of artifacts from S3
env:
ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
shell: bash
run: |
set -eux
${CONDA_RUN} python -mpip install awscli==1.32.18
mkdir -p artifacts
pushd artifacts
${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
popd
ls -lah artifacts
- name: Extract the benchmark results JSON
shell: bash
run: |
set -eux
mkdir -p benchmark-results
for ARTIFACTS_BY_JOB in artifacts/*.json; do
[ -f "${ARTIFACTS_BY_JOB}" ] || break
echo "${ARTIFACTS_BY_JOB}"
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
--artifacts "${ARTIFACTS_BY_JOB}" \
--output-dir benchmark-results \
--repo ${{ github.repository }} \
--head-branch ${{ github.head_ref || github.ref_name }} \
--workflow-name "${{ github.workflow }}" \
--workflow-run-id ${{ github.run_id }} \
--workflow-run-attempt ${{ github.run_attempt }}
done
ls -lah benchmark-results
for BENCHMARK_RESULTS in benchmark-results/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
- name: Upload the benchmark results
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: 'benchmark-results'
dry-run: false
1 change: 1 addition & 0 deletions .github/workflows/android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ on:
- install_requirements.sh
- examples/demo-apps/android/**
- extension/android/**
- extension/benchmark/android/**
- extension/module/**
workflow_dispatch:

Expand Down
Loading

0 comments on commit d71daf4

Please sign in to comment.