vLLM Benchmark #390

Workflow file for this run

.github/workflows/vllm-benchmark.yml at 319ad22

	name: vLLM Benchmark

	on:
	schedule:
	# Run every 2 hours
	- cron: '0 /2 * *'
	workflow_dispatch:
	inputs:
	vllm_branch:
	description: vLLM branch
	required: true
	type: string
	default: main
	vllm_commit:
	description: vLLM commit
	required: false
	type: string
	models:
	description: \|
	A comma-separated list of models to benchmark, leave empty to run everything
	required: false
	type: string
	gpus:
	description: \|
	A comma-separated list of GPUs to benchmark, i.e. h100, mi300
	required: true
	type: string
	default: h100,mi300
	pull_request:
	paths:
	- .github/workflows/vllm-benchmark.yml
	- vllm-benchmarks/**

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	set-parameters:
	runs-on: ubuntu-latest
	outputs:
	benchmark_matrix: ${{ steps.set-parameters.outputs.benchmark_matrix }}
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'

	- name: Set parameters
	id: set-parameters
	shell: bash
	env:
	MODELS: ${{ inputs.models \|\| '' }}
	GPUS: ${{ inputs.gpus \|\| '' }}
	run: \|
	set -eux

	# The generated matrix is grouped by model and runner
	python .github/scripts/generate_vllm_benchmark_matrix.py \
	--benchmark-configs-dir vllm-benchmarks/benchmarks \
	--models "${MODELS}" \
	--gpus "${GPUS}"

	benchmarks:
	name: Run vLLM benchmarks
	needs: set-parameters
	strategy:
	matrix: ${{ fromJson(needs.set-parameters.outputs.benchmark_matrix) }}
	fail-fast: false
	runs-on: ${{ matrix.runner }}
	environment: pytorch-x-vllm
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Checkout vLLM repository
	uses: actions/checkout@v4
	with:
	repository: vllm-project/vllm
	path: vllm-benchmarks/vllm
	ref: ${{ inputs.vllm_branch \|\| 'main' }}
	fetch-depth: 0

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'
	cache: 'pip'

	- name: Check if the device is supported
	shell: bash
	run: \|
	set -eux

	if command -v nvidia-smi; then
	DEVICE_NAME=cuda
	nvidia-smi
	elif command -v rocm-smi; then
	DEVICE_NAME=rocm
	rocm-smi
	else
	echo "Only CUDA and ROCm benchmarks are supported at the moment"
	exit 1
	fi
	echo "DEVICE_NAME=$DEVICE_NAME" >> $GITHUB_ENV

	- name: Set GPU name and type
	working-directory: vllm-benchmarks
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	DEVICE_TYPE=$(nvidia-smi -i 0 --query-gpu=name --format=csv,noheader \| awk '{print $2}')
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DEVICE_TYPE=$(rocminfo \| grep "Marketing Name" \| tail -n1 \| awk -F':' '{print $2}' \| xargs)
	fi
	echo "DEVICE_TYPE=$DEVICE_TYPE" >> $GITHUB_ENV

	- name: Install dependencies
	shell: bash
	run: \|
	set -eux

	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	pip install -r .github/scripts/requirements.txt
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	pip install -r .github/scripts/requirements.txt \
	--extra-index-url https://download.pytorch.org/whl/rocm6.3
	fi

	- name: Set Docker registry
	shell: bash
	run: \|
	if [[ "${DEVICE_NAME}" == "cuda" ]]; then
	DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
	elif [[ "${DEVICE_NAME}" == "rocm" ]]; then
	DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
	fi
	echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV

	- name: Check for last benchmark commit
	working-directory: vllm-benchmarks
	env:
	HEAD_BRANCH: ${{ inputs.vllm_branch \|\| 'main' }}
	HEAD_SHA: ${{ inputs.vllm_commit \|\| '' }}
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	if [[ -z "${HEAD_SHA}" ]]; then
	pushd vllm
	# Looking back the latest 100 commits is enough
	for i in {0..99}
	do
	# Check if the image is there, if it doesn't then check an older one
	# because the commit is too recent
	HEAD_SHA=$(git rev-parse --verify HEAD~${i})
	DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}"

	# No Docker image available yet because the commit is too recent
	if ! docker manifest inspect "${DOCKER_IMAGE}"; then
	continue
	fi

	NOT_EXIST=0
	S3_PATH="v3/vllm-project/vllm/${HEAD_BRANCH}/${HEAD_SHA}/${DEVICE_TYPE// /_}/benchmark_results_${MODELS//\//_}.json"
	aws s3api head-object --bucket ossci-benchmarks --key ${S3_PATH} \|\| NOT_EXIST=1

	if [[ ${NOT_EXIST} == "1" ]]; then
	echo "Found a vLLM commit ${HEAD_SHA} that hasn't been benchmarked yet"
	break
	fi
	done
	popd
	fi

	echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV

	- name: Setup CUDA GPU_FLAG for docker run
	if: env.DEVICE_NAME == 'cuda'
	run: \|
	echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

	- name: Setup ROCm
	if: env.DEVICE_NAME == 'rocm'
	uses: pytorch/pytorch/./.github/actions/setup-rocm@main

	- name: Setup SCCACHE_SERVER_PORT environment for docker run when on container
	run: \|
	echo "SCCACHE_SERVER_PORT_DOCKER_FLAG=-e SCCACHE_SERVER_PORT=$((RUNNER_UID + 4226))" >> "${GITHUB_ENV}"

	- name: Setup benchmark tests
	env:
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	pushd vllm-benchmarks/vllm
	git checkout "${HEAD_SHA}"
	rm .buildkite/nightly-benchmarks/tests/*.json
	popd

	# Set the list of benchmarks we want to cover in this runner
	python .github/scripts/setup_vllm_benchmark.py \
	--from-benchmark-configs-dir vllm-benchmarks/benchmarks \
	--to-benchmark-configs-dir vllm-benchmarks/vllm/.buildkite/nightly-benchmarks/tests \
	--models "${MODELS}"

	pushd vllm-benchmarks/vllm
	ls -lah .buildkite/nightly-benchmarks/tests
	find .buildkite/nightly-benchmarks/tests -type f -exec cat {} \;
	popd

	- name: Run vLLM benchmark
	env:
	SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
	SCCACHE_REGION: us-east-1
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}
	# vLLM-related environment variables
	ENGINE_VERSION: v1
	SAVE_TO_PYTORCH_BENCHMARK_FORMAT: 1
	run: \|
	set -x

	docker run \
	${GPU_FLAG:-} \
	${SCCACHE_SERVER_PORT_DOCKER_FLAG:-} \
	-e SCCACHE_BUCKET \
	-e SCCACHE_REGION \
	-e DEVICE_NAME \
	-e DEVICE_TYPE \
	-e HF_TOKEN \
	-e ENGINE_VERSION \
	-e SAVE_TO_PYTORCH_BENCHMARK_FORMAT \
	--ipc=host \
	--tty \
	--security-opt seccomp=unconfined \
	-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
	-w /tmp/workspace \
	"${DOCKER_IMAGE}" \
	bash -xc "cd vllm-benchmarks/vllm && bash .buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh"

	- name: Authenticate with AWS
	# AWS CUDA runners already have access to the bucket via its runner IAM role
	if: env.DEVICE_NAME != 'cuda'
	uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
	with:
	role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
	# The max duration enforced by the server side
	role-duration-seconds: 18000
	aws-region: us-east-1

	- name: Upload the benchmark results
	env:
	BENCHMARK_RESULTS: vllm-benchmarks/vllm/benchmarks/results
	MODELS: ${{ matrix.models }}
	run: \|
	set -eux

	sudo chown -R ${UID} "${BENCHMARK_RESULTS}"
	ls -lah "${BENCHMARK_RESULTS}"

	python .github/scripts/upload_benchmark_results.py \
	--repo vllm-benchmarks/vllm \
	--benchmark-name "vLLM benchmark" \
	--benchmark-results "${BENCHMARK_RESULTS}" \
	--device "${DEVICE_TYPE// /_}" \
	--model "${MODELS//\//_}"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

vLLM Benchmark #390

Workflow file

vLLM Benchmark #390

Uh oh!

Jobs

Run details

Workflow file for this run