vLLM Benchmark #390
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: vLLM Benchmark | |
on: | |
schedule: | |
# Run every 2 hours | |
- cron: '0 */2 * * *' | |
workflow_dispatch: | |
inputs: | |
vllm_branch: | |
description: vLLM branch | |
required: true | |
type: string | |
default: main | |
vllm_commit: | |
description: vLLM commit | |
required: false | |
type: string | |
models: | |
description: | | |
A comma-separated list of models to benchmark, leave empty to run everything | |
required: false | |
type: string | |
gpus: | |
description: | | |
A comma-separated list of GPUs to benchmark, i.e. h100, mi300 | |
required: true | |
type: string | |
default: h100,mi300 | |
pull_request: | |
paths: | |
- .github/workflows/vllm-benchmark.yml | |
- vllm-benchmarks/** | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
set-parameters: | |
runs-on: ubuntu-latest | |
outputs: | |
benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }} | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
- name: Set parameters | |
id: set-parameters | |
shell: bash | |
env: | |
MODELS: ${{ inputs.models || '' }} | |
GPUS: ${{ inputs.gpus || '' }} | |
run: | | |
set -eux | |
# The generated matrix is grouped by model and runner | |
python .github/scripts/generate_vllm_benchmark_matrix.py \ | |
--benchmark-configs-dir vllm-benchmarks/benchmarks \ | |
--models "${MODELS}" \ | |
--gpus "${GPUS}" | |
benchmarks: | |
name: Run vLLM benchmarks | |
needs: set-parameters | |
strategy: | |
matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }} | |
fail-fast: false | |
runs-on: ${{ matrix.runner }} | |
environment: pytorch-x-vllm | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Checkout vLLM repository | |
uses: actions/checkout@v4 | |
with: | |
repository: vllm-project/vllm | |
path: vllm-benchmarks/vllm | |
ref: ${{ inputs.vllm_branch || 'main' }} | |
fetch-depth: 0 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
cache: 'pip' | |
- name: Check if the device is supported | |
shell: bash | |
run: | | |
set -eux | |
if command -v nvidia-smi; then | |
DEVICE_NAME=cuda | |
nvidia-smi | |
elif command -v rocm-smi; then | |
DEVICE_NAME=rocm | |
rocm-smi | |
else | |
echo "Only CUDA and ROCm benchmarks are supported at the moment" | |
exit 1 | |
fi | |
echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV | |
- name: Set GPU name and type | |
working-directory: vllm-benchmarks | |
shell: bash | |
run: | | |
set -eux | |
if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader | awk '{print $2}') | |
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
DEVICE_TYPE=$(rocminfo | grep "Marketing Name" | tail -n1 | awk -F':' '{print $2}' | xargs) | |
fi | |
echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV | |
- name: Install dependencies | |
shell: bash | |
run: | | |
set -eux | |
if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
pip install -r .github/scripts/requirements.txt | |
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
pip install -r .github/scripts/requirements.txt \ | |
--extra-index-url https://download.pytorch.org/whl/rocm6.3 | |
fi | |
- name: Set Docker registry | |
shell: bash | |
run: | | |
if [[ "${DEVICE_NAME}" == "cuda" ]]; then | |
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo | |
elif [[ "${DEVICE_NAME}" == "rocm" ]]; then | |
DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci | |
fi | |
echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV | |
- name: Check for last benchmark commit | |
working-directory: vllm-benchmarks | |
env: | |
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }} | |
HEAD_SHA: ${{ inputs.vllm_commit || '' }} | |
MODELS: ${{ matrix.models }} | |
run: | | |
set -eux | |
if [[ -z "${HEAD_SHA}" ]]; then | |
pushd vllm | |
# Looking back the latest 100 commits is enough | |
for i in {0..99} | |
do | |
# Check if the image is there, if it doesn't then check an older one | |
# because the commit is too recent | |
HEAD_SHA=$(git rev-parse --verify HEAD~${i}) | |
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}" | |
# No Docker image available yet because the commit is too recent | |
if ! docker manifest inspect "${DOCKER_IMAGE}"; then | |
continue | |
fi | |
NOT_EXIST=0 | |
S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json" | |
aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} || NOT_EXIST=1 | |
if [[ ${NOT_EXIST} == "1" ]]; then | |
echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet" | |
break | |
fi | |
done | |
popd | |
fi | |
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV | |
- name: Setup CUDA GPU_FLAG for docker run | |
if: env.DEVICE_NAME == 'cuda' | |
run: | | |
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | |
- name: Setup ROCm | |
if: env.DEVICE_NAME == 'rocm' | |
uses: pytorch/pytorch/./.github/actions/setup-rocm@main | |
- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container | |
run: | | |
echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}" | |
- name: Setup benchmark tests | |
env: | |
MODELS: ${{ matrix.models }} | |
run: | | |
set -eux | |
pushd vllm-benchmarks/vllm | |
git checkout "${HEAD_SHA}" | |
rm .buildkite/nightly-benchmarks/tests/*.json | |
popd | |
# Set the list of benchmarks we want to cover in this runner | |
python .github/scripts/setup_vllm_benchmark.py \ | |
--from-benchmark-configs-dir vllm-benchmarks/benchmarks \ | |
--to-benchmark-configs-dir vllm-benchmarks/vllm/.buildkite/nightly-benchmarks/tests \ | |
--models "${MODELS}" | |
pushd vllm-benchmarks/vllm | |
ls -lah .buildkite/nightly-benchmarks/tests | |
find .buildkite/nightly-benchmarks/tests -type f -exec cat {} \; | |
popd | |
- name: Run vLLM benchmark | |
env: | |
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 | |
SCCACHE_REGION: us-east-1 | |
HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }} | |
# vLLM-related environment variables | |
ENGINE_VERSION: v1 | |
SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1 | |
run: | | |
set -x | |
docker run \ | |
${GPU_FLAG:-} \ | |
${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \ | |
-e SCCACHE_BUCKET \ | |
-e SCCACHE_REGION \ | |
-e DEVICE_NAME \ | |
-e DEVICE_TYPE \ | |
-e HF_TOKEN \ | |
-e ENGINE_VERSION \ | |
-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \ | |
--ipc=host \ | |
--tty \ | |
--security-opt seccomp=unconfined \ | |
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | |
-w /tmp/workspace \ | |
"${DOCKER_IMAGE}" \ | |
bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh" | |
- name: Authenticate with AWS | |
# AWS CUDA runners already have access to the bucket via its runner IAM role | |
if: env.DEVICE_NAME != 'cuda' | |
uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0 | |
with: | |
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results | |
# The max duration enforced by the server side | |
role-duration-seconds: 18000 | |
aws-region: us-east-1 | |
- name: Upload the benchmark results | |
env: | |
BENCHMARK_RESULTS: vllm-benchmarks/vllm/benchmarks/results | |
MODELS: ${{ matrix.models }} | |
run: | | |
set -eux | |
sudo chown -R ${UID} "${BENCHMARK_RESULTS}" | |
ls -lah "${BENCHMARK_RESULTS}" | |
python .github/scripts/upload_benchmark_results.py \ | |
--repo vllm-benchmarks/vllm \ | |
--benchmark-name "vLLM benchmark" \ | |
--benchmark-results "${BENCHMARK_RESULTS}" \ | |
--device "${DEVICE_TYPE// /_}" \ | |
--model "${MODELS//\//_}" |