Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
GHA Benchmark : Automatic benchmarking on manual trigger (#46)
Browse files Browse the repository at this point in the history
Summary:
Add benchmarking workflow and action that runs the benchmarks on a manual
trigger.

Test:
Try it locally.
Successful GHA Benchmark Run -
https://github.com/neuralmagic/neuralmagic-vllm/actions/runs/8019392326

---------

Co-authored-by: varun <varun@varuns-MacBook-Pro.local>
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
  • Loading branch information
3 people authored Feb 27, 2024
1 parent 820c992 commit 8b95d03
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 32 deletions.
15 changes: 14 additions & 1 deletion .github/actions/nm-benchmark/action.yml
Original file line number Diff line number Diff line change
@@ -1,17 +1,30 @@
name: run vllm benchmarks
description: 'run vllm benchmarks'
inputs:
benchmark_config_list_file:
description: 'Path to a file containing a list of benchmark-configs to run benchmarks with. For reference look at .github/data/nm_benchmark_configs_list.txt'
required: true
output_directory:
description: 'output directory to store the benchmark results'
required: true
python:
description: 'python version, e.g. 3.10.12'
required: true
venv:
description: 'name for python virtual environment'
required: true
runs:
using: composite
steps:
- id: benchmark
run: |
mkdir -p ${{ inputs.output_directory }}
COMMIT=${{ github.sha }}
VENV="${{ inputs.venv }}-${COMMIT:0:7}"
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
SUCCESS=0
.github/workflows/scripts/nm-run-benchmarks.sh ${{ inputs.output_directory }} || SUCCESS=$?
.github/workflows/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
exit ${SUCCESS}
shell: bash
2 changes: 2 additions & 0 deletions .github/data/nm_benchmark_configs_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
neuralmagic/benchmarks/configs/benchmark_serving.json
neuralmagic/benchmarks/configs/benchmark_throughput.json
39 changes: 33 additions & 6 deletions .github/workflows/nm-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ on:
description: "requested runner label (specifies instance)"
type: string
required: true
benchmark_config_list_file:
description: "Path to a file containing a list of benchmark-configs to run benchmarks with. For reference look at .github/data/nm_benchmark_configs_list.txt"
type: string
required: true
timeout:
description: "approximate number of minutes to keep instance up (should be at least 20)."
type: string
Expand All @@ -14,6 +18,10 @@ on:
description: "git commit hash or branch name"
type: string
required: true
python:
description: "python version, e.g. 3.10.12"
type: string
required: true

jobs:
BENCHMARK:
Expand All @@ -35,24 +43,43 @@ jobs:
gitref: ${{ inputs.gitref }}
label: ${{ inputs.label }}
timeout: ${{ inputs.timeout }}

# Call the `build` action when available
#- name: build
# id: build
# uses: ./.github/actions/build/

- name: setenv
id: setenv
uses: ./.github/actions/nm-set-env/
with:
hf_home: ${{ secrets.NM_HF_HOME }}

- name: set python
id: set_python
uses: ./.github/actions/nm-set-python/
with:
python: ${{ inputs.python }}
venv: TEST

- name: build
id: build
uses: ./.github/actions/nm-build-vllm/
with:
Gi_per_thread: 1
python: ${{ inputs.python }}
venv: TEST

- name: run benchmarks
uses: ./.github/actions/nm-benchmark/
with:
benchmark_config_list_file: ${{ inputs.benchmark_config_list_file }}
output_directory: benchmark-results
python: ${{ inputs.python }}
venv: TEST

- name: store benchmark result artifacts
uses: actions/upload-artifact@v4
if: success() || failure()
with:
name: ${{ github.run_id }}-${{ inputs.label }}
path: benchmark-results
retention-days: 90
retention-days: 15

#######################################################
# TODO (Varun) : Remove pause once things are automated
Expand Down
21 changes: 14 additions & 7 deletions .github/workflows/scripts/nm-run-benchmarks.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
#!/bin/bash
# GHA uses this script to run benchmarks.

set -e
set -u

if [ $# -ne 1 ];
if [ $# -ne 2 ];
then
echo "run_benchmarks needs exactly 1 argument - The output path to store the benchmark results"
exit -1
echo "run_benchmarks needs exactly 2 arguments: "
echo " 1. Path to a .txt file containing the list of benchmark config paths"
echo " 2. The output path to store the benchmark results"
exit 1
fi

output_directory=$1

touch $ouptut_directory/bench_test_1.txt
touch $ouptut_directory/bench_test_2.txt
benchmark_config_list_file=$1
output_directory=$2

for bench_config in `cat $benchmark_config_list_file`
do
echo "Running benchmarks for config " $bench_config
python3 -m neuralmagic.benchmarks.run_benchmarks -i $bench_config -o $output_directory
done
16 changes: 12 additions & 4 deletions neuralmagic/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,26 @@
from argparse import Namespace
from pathlib import Path
from typing import NamedTuple, Iterable
# from neuralmagic.tools.call_cmd import call_cmd

from vllm.model_executor.weight_utils import prepare_hf_model_weights
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.transformers_utils.config import get_config
# TODO (varun) : find a workaround so we avoid using private methods
from vllm.config import _get_and_verify_max_len


def download_model(hf_model_id: str) -> None:
def download_model(model: str) -> None:
"""
Downloads a hugging face model to cache
"""
prepare_hf_model_weights(hf_model_id)
get_tokenizer(hf_model_id)
prepare_hf_model_weights(model)
get_tokenizer(model)


def max_model_length_from_model_id(model: str,
trust_remote_code: bool = False) -> int:
config = get_config(model, trust_remote_code=trust_remote_code)
return _get_and_verify_max_len(config, max_model_len=None)


def script_args_to_cla(config: NamedTuple) -> Iterable[list[str]]:
Expand Down
6 changes: 6 additions & 0 deletions neuralmagic/benchmarks/configs/benchmark_serving.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
"mistralai/Mistral-7B-Instruct-v0.2",
"NousResearch/Llama-2-7b-chat-hf"
],
"max_model_lens": [
4096
],
"sparsity": [],
"script_name": "benchmark_serving",
"script_args": {
Expand All @@ -33,6 +36,9 @@
"mistralai/Mistral-7B-Instruct-v0.2",
"NousResearch/Llama-2-7b-chat-hf"
],
"max_model_lens": [
4096
],
"sparsity": [],
"script_name": "benchmark_serving",
"script_args": {
Expand Down
3 changes: 3 additions & 0 deletions neuralmagic/benchmarks/configs/benchmark_throughput.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"mistralai/Mistral-7B-Instruct-v0.2",
"NousResearch/Llama-2-7b-chat-hf"
],
"max_model_lens" : [4096],
"script_name": "benchmark_throughput",
"script_args": {
"backend": [
Expand Down Expand Up @@ -44,6 +45,7 @@
"mistralai/Mistral-7B-Instruct-v0.2",
"NousResearch/Llama-2-7b-chat-hf"
],
"max_model_lens" : [4096],
"script_name": "benchmark_throughput",
"script_args": {
"backend": [
Expand Down Expand Up @@ -87,6 +89,7 @@
"mistralai/Mistral-7B-Instruct-v0.2",
"NousResearch/Llama-2-7b-chat-hf"
],
"max_model_lens" : [4096],
"script_name": "benchmark_throughput",
"script_args": {
"backend": [
Expand Down
1 change: 1 addition & 0 deletions neuralmagic/benchmarks/requirements-benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# requirements specific to the vllm neuralmagic fork
requests
aiohttp
datasets
26 changes: 21 additions & 5 deletions neuralmagic/benchmarks/run_benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
import subprocess
import requests
import time
import itertools

from typing import NamedTuple, Optional
from pathlib import Path

from neuralmagic.tools.call_cmd import call_cmd
from neuralmagic.benchmarks.common import download_model, script_args_to_cla, benchmark_configs
from neuralmagic.benchmarks.common import download_model, max_model_length_from_model_id, script_args_to_cla, benchmark_configs
from neuralmagic.benchmarks.scripts.common import warmup_server

BENCH_SERVER_HOST = "localhost"
BENCH_SERVER_PORT = 9000


def is_server_running(host: str, port: int, timeout=60) -> bool:
def is_server_running(host: str, port: int, timeout=300) -> bool:

def try_connection() -> bool:
try:
Expand Down Expand Up @@ -64,12 +65,27 @@ def run_bench(server_cmd: str, bench_cmd: list[str], model: str) -> None:
script_path = f"neuralmagic.benchmarks.scripts.{config.script_name}"

sparsities = [None] if len(config.sparsity) == 0 else config.sparsity
for model in config.models:

for model, sparsity in itertools.product(config.models, sparsities):

# download model beforehand so the server can start without any holdup
download_model(model)

for sparsity in sparsities:
server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT} --disable-log-requests"
supported_max_model_len = max_model_length_from_model_id(model)

# If the requested model-len is too big, try running with the maximum supported for this model.
max_model_lens = set(
map(lambda v: min(v, supported_max_model_len),
config.max_model_lens))
if (config.max_model_lens != list(max_model_lens)):
print(
f"WARNING: max_model_len modified to {max_model_lens} from {config.max_model_lens} for model {model}"
)

for max_model_len in max_model_lens:

server_cmd = f"python3 -m vllm.entrypoints.api_server --model {model} --tokenizer {model} --max-model-len {max_model_len} --host {BENCH_SERVER_HOST} --port {BENCH_SERVER_PORT} --disable-log-requests"

if sparsity:
server_cmd += f" --sparsity {sparsity} "

Expand Down
33 changes: 24 additions & 9 deletions neuralmagic/benchmarks/run_benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import NamedTuple, Optional

from neuralmagic.tools.call_cmd import call_cmd
from neuralmagic.benchmarks.common import script_args_to_cla, benchmark_configs
from neuralmagic.benchmarks.common import script_args_to_cla, benchmark_configs, max_model_length_from_model_id


def run_benchmark_throughput_script(config: NamedTuple,
Expand All @@ -15,16 +15,31 @@ def run_benchmark_throughput_script(config: NamedTuple,
script_path = f"neuralmagic.benchmarks.scripts.{config.script_name}"

for model in config.models:
for script_args in script_args_to_cla(config):
bench_cmd = (["python3", "-m", f"{script_path}"] + script_args +
["--model", f"{model}"] + ["--tokenizer", f"{model}"])

if output_directory:
bench_cmd = bench_cmd + [
"--save-directory", f"{output_directory}"
]
supported_max_model_len = max_model_length_from_model_id(model)

call_cmd(bench_cmd, stdout=None, stderr=None)
# If the requested model-len is too big, try running with the maximum supported for this model.
max_model_lens = set(
map(lambda v: min(v, supported_max_model_len),
config.max_model_lens))
if (config.max_model_lens != list(max_model_lens)):
print(
f"WARNING: max_model_len modified to {max_model_lens} from {config.max_model_lens} for model {model}"
)

for max_model_len in max_model_lens:
for script_args in script_args_to_cla(config):
bench_cmd = (["python3", "-m", f"{script_path}"] +
script_args + ["--model", f"{model}"] +
["--tokenizer", f"{model}"] +
["--max-model-len", f"{max_model_len}"])

if output_directory:
bench_cmd = bench_cmd + [
"--save-directory", f"{output_directory}"
]

call_cmd(bench_cmd, stdout=None, stderr=None)


if __name__ == '__main__':
Expand Down

0 comments on commit 8b95d03

Please sign in to comment.