Skip to content

Commit

Permalink
[CI/Build] AMD CI pipeline with extended set of tests. (vllm-project#…
Browse files Browse the repository at this point in the history
…4267)

Co-authored-by: simon-mo <simon.mo@hey.com>
  • Loading branch information
Alexei-V-Ivanov-AMD and simon-mo authored May 2, 2024
1 parent 32881f3 commit 9b5c9f9
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 45 deletions.
58 changes: 25 additions & 33 deletions .buildkite/run-amd-test.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# This script build the ROCm docker image and run the API server inside the container.
# It serves a sanity check for compilation and basic model usage.
# This script build the ROCm docker image and runs test inside it.
set -ex

# Print ROCm version
echo "--- ROCm info"
rocminfo

echo "--- Resetting GPUs"

echo "reset" > /opt/amdgpu/etc/gpu_state

Expand All @@ -16,37 +17,28 @@ while true; do
fi
done

echo "--- Building container"
sha=$(git rev-parse --short HEAD)
container_name=rocm_${sha}
docker build \
-t ${container_name} \
-f Dockerfile.rocm \
--progress plain \
.

remove_docker_container() {
docker rm -f ${container_name} || docker image rm -f ${container_name} || true
}
trap remove_docker_container EXIT

echo "--- Running container"

# Try building the docker image
docker build -t rocm -f Dockerfile.rocm .

# Setup cleanup
remove_docker_container() { docker rm -f rocm || true; }
trap remove_docker_container EXIT
remove_docker_container

# Run the image
export HIP_VISIBLE_DEVICES=1
docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server &

# Wait for the server to start
wait_for_server_to_start() {
timeout=300
counter=0

while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
sleep 1
counter=$((counter + 1))
if [ $counter -ge $timeout ]; then
echo "Timeout after $timeout seconds"
break
fi
done
}
wait_for_server_to_start
docker run \
--device /dev/kfd --device /dev/dri \
--network host \
--rm \
-e HF_TOKEN \
--name ${container_name} \
${container_name} \
/bin/bash -c $(echo $1 | sed "s/^'//" | sed "s/'$//")

# Test a simple prompt
curl -X POST -H "Content-Type: application/json" \
localhost:8000/generate \
-d '{"prompt": "San Francisco is a"}'
5 changes: 5 additions & 0 deletions .buildkite/run-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
echo '```' >> benchmark_results.md

# if the agent binary is not found, skip uploading the results, exit 0
if [ ! -f /workspace/buildkite-agent ]; then
exit 0
fi

# upload the results to buildkite
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

Expand Down
15 changes: 14 additions & 1 deletion .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ steps:
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py

- label: Core Test
mirror_hardwares: [amd]
command: pytest -v -s core

- label: Distributed Comm Ops Test
Expand All @@ -29,7 +30,10 @@ steps:

- label: Distributed Tests
working_dir: "/vllm-workspace/tests/distributed"
num_gpus: 2

num_gpus: 2 # only support 1 or 2 for now.
mirror_hardwares: [amd]

commands:
- pytest -v -s test_pynccl_library.py
- TEST_DIST_MODEL=facebook/opt-125m pytest -v -s test_basic_distributed_correctness.py
Expand All @@ -44,6 +48,7 @@ steps:
- pytest -v -s test_pynccl.py

- label: Engine Test
mirror_hardwares: [amd]
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py

- label: Entrypoints Test
Expand All @@ -54,6 +59,7 @@ steps:

- label: Examples Test
working_dir: "/vllm-workspace/examples"
mirror_hardwares: [amd]
commands:
# install aws cli for llava_example.py
- pip install awscli
Expand All @@ -67,29 +73,35 @@ steps:
parallelism: 4

- label: Models Test
mirror_hardwares: [amd]
commands:
- bash ../.buildkite/download-images.sh
- pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py

- label: Llava Test
mirror_hardwares: [amd]
commands:
- bash ../.buildkite/download-images.sh
- pytest -v -s models/test_llava.py

- label: Prefix Caching Test
mirror_hardwares: [amd]
commands:
- pytest -v -s prefix_caching

- label: Samplers Test
command: pytest -v -s samplers

- label: LogitsProcessor Test
mirror_hardwares: [amd]
command: pytest -v -s test_logits_processor.py

- label: Worker Test
mirror_hardwares: [amd]
command: pytest -v -s worker

- label: Speculative decoding tests
mirror_hardwares: [amd]
command: pytest -v -s spec_decode

- label: LoRA Test %N
Expand All @@ -107,6 +119,7 @@ steps:

- label: Benchmarks
working_dir: "/vllm-workspace/.buildkite"
mirror_hardwares: [amd]
commands:
- pip install aiohttp
- bash run-benchmarks.sh
Expand Down
21 changes: 16 additions & 5 deletions .buildkite/test-template.j2
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,29 @@ steps:
limit: 5
- wait

- label: "AMD Test"
agents:
queue: amd
command: bash .buildkite/run-amd-test.sh
- group: "AMD Tests"
depends_on: ~
steps:
{% for step in steps %}
{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
- label: "AMD: {{ step.label }}"
agents:
queue: amd
command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
env:
DOCKER_BUILDKIT: "1"
{% endif %}
{% endfor %}

- label: "Neuron Test"
depends_on: ~
agents:
queue: neuron
command: bash .buildkite/run-neuron-test.sh
soft_fail: true

- label: "CPU Test"
- label: "Intel Test"
depends_on: ~
command: bash .buildkite/run-cpu-test.sh

{% for step in steps %}
Expand Down
13 changes: 7 additions & 6 deletions Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \

### Mount Point ###
# When launching the container, mount the code directory to /app
ARG APP_MOUNT=/app
ARG APP_MOUNT=/vllm-workspace
VOLUME [ ${APP_MOUNT} ]
WORKDIR ${APP_MOUNT}

Expand Down Expand Up @@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
&& cd ../..; \
fi

COPY ./ /app/vllm
WORKDIR /vllm-workspace
COPY . .

RUN python3 -m pip install --upgrade pip numba

RUN cd /app \
&& cd vllm \
&& pip install -U -r requirements-rocm.txt \
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -U -r requirements-rocm.txt \
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
&& python3 setup.py install \
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
&& cd ..

RUN python3 -m pip install --upgrade pip
Expand Down

0 comments on commit 9b5c9f9

Please sign in to comment.