Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI/Build] AMD CI pipeline with extended set of tests. #4267

Merged
merged 37 commits into from
May 2, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5ca5b12
AMD CI pipeline with extended set of tests.
Alexei-V-Ivanov-AMD Apr 22, 2024
c43169f
Correction.
Alexei-V-Ivanov-AMD Apr 22, 2024
49206a5
Another correction.
Alexei-V-Ivanov-AMD Apr 22, 2024
9becdfb
Correction ++
Alexei-V-Ivanov-AMD Apr 22, 2024
7b53280
New build.
Alexei-V-Ivanov-AMD Apr 22, 2024
d45036b
Merge branch 'vllm-project:main' into extended_amdci
Alexei-V-Ivanov-AMD Apr 22, 2024
4f1a36b
Merge branch 'vllm-project:main' into extended_amdci
Alexei-V-Ivanov-AMD Apr 25, 2024
ac196f9
Update run-amd-test_distributed.sh
Alexei-V-Ivanov-AMD Apr 25, 2024
ff2f28f
Update Dockerfile.rocm
Alexei-V-Ivanov-AMD Apr 25, 2024
e4b52c4
Update run-amd-test_speculative_decoding.sh
Alexei-V-Ivanov-AMD Apr 25, 2024
ad960c7
Removing unnecessary replication of amd-test files.
Alexei-V-Ivanov-AMD Apr 29, 2024
0db9072
Fix.
Alexei-V-Ivanov-AMD Apr 29, 2024
bfcd53f
Removing redundant amd-test scripts.
Alexei-V-Ivanov-AMD Apr 29, 2024
f6386fa
Fixing syntax.
Alexei-V-Ivanov-AMD Apr 29, 2024
ba3b398
Adding more tests.
Alexei-V-Ivanov-AMD Apr 29, 2024
b43fbc5
Registering new tests.
Alexei-V-Ivanov-AMD Apr 29, 2024
7316136
Minor fixes.
Alexei-V-Ivanov-AMD Apr 29, 2024
2845480
Merge branch 'extended_amdci_v2' into extended_amdci
Alexei-V-Ivanov-AMD Apr 29, 2024
d60640b
use templating to reduce complexty
simon-mo Apr 30, 2024
29d836a
endif
simon-mo Apr 30, 2024
c32e5ce
fix syntax
simon-mo Apr 30, 2024
d5f0d9f
run hardware build faster
simon-mo Apr 30, 2024
df1d3d7
setup group ignore depends
simon-mo Apr 30, 2024
5f6b49a
add back container cleanup
simon-mo Apr 30, 2024
781c172
Merge branch 'vllm-project:main' into extended_amdci
Alexei-V-Ivanov-AMD May 1, 2024
295bffe
Update Dockerfile.rocm
Alexei-V-Ivanov-AMD May 1, 2024
8dbbf00
Update .buildkite/test-template.j2
Alexei-V-Ivanov-AMD May 1, 2024
e48b098
Merge branch 'vllm-project:main' into extended_amdci
Alexei-V-Ivanov-AMD May 1, 2024
420e395
Update run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
7433522
Update run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
bc77233
Update run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
5f6b434
Update run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
0d8deac
Update run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
b2bd969
Reverting quotation back to the original state in run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
0800063
Update run-amd-test.sh
Alexei-V-Ivanov-AMD May 1, 2024
2b58db6
Merge branch 'main' into extended_amdci
Alexei-V-Ivanov-AMD May 2, 2024
7a98927
Merge branch 'vllm-project:main' into extended_amdci
Alexei-V-Ivanov-AMD May 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 25 additions & 33 deletions .buildkite/run-amd-test.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# This script build the ROCm docker image and run the API server inside the container.
# It serves a sanity check for compilation and basic model usage.
# This script build the ROCm docker image and runs test inside it.
set -ex

# Print ROCm version
echo "--- ROCm info"
rocminfo

echo "--- Resetting GPUs"

echo "reset" > /opt/amdgpu/etc/gpu_state

Expand All @@ -16,37 +17,28 @@ while true; do
fi
done

echo "--- Building container"
sha=$(git rev-parse --short HEAD)
container_name=rocm_${sha}
docker build \
-t ${container_name} \
-f Dockerfile.rocm \
--progress plain \
.

remove_docker_container() {
docker rm -f ${container_name} || docker image rm -f ${container_name} || true
}
trap remove_docker_container EXIT

echo "--- Running container"

# Try building the docker image
docker build -t rocm -f Dockerfile.rocm .

# Setup cleanup
remove_docker_container() { docker rm -f rocm || true; }
trap remove_docker_container EXIT
remove_docker_container

# Run the image
export HIP_VISIBLE_DEVICES=1
docker run --device /dev/kfd --device /dev/dri --network host -e HIP_VISIBLE_DEVICES --name rocm rocm python3 -m vllm.entrypoints.api_server &

# Wait for the server to start
wait_for_server_to_start() {
timeout=300
counter=0

while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
sleep 1
counter=$((counter + 1))
if [ $counter -ge $timeout ]; then
echo "Timeout after $timeout seconds"
break
fi
done
}
wait_for_server_to_start
docker run \
--device /dev/kfd --device /dev/dri \
--network host \
--rm \
-e HF_TOKEN \
--name ${container_name} \
${container_name} \
/bin/bash -c "$1"

# Test a simple prompt
curl -X POST -H "Content-Type: application/json" \
localhost:8000/generate \
-d '{"prompt": "San Francisco is a"}'
5 changes: 5 additions & 0 deletions .buildkite/run-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ echo '```' >> benchmark_results.md
tail -n 20 benchmark_serving.txt >> benchmark_results.md # last 20 lines
echo '```' >> benchmark_results.md

# if the agent binary is not found, skip uploading the results, exit 0
if [ ! -f /workspace/buildkite-agent ]; then
exit 0
fi

# upload the results to buildkite
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

Expand Down
11 changes: 11 additions & 0 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ steps:
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py

- label: Core Test
mirror_hardwares: [amd]
command: pytest -v -s core

- label: Distributed Comm Ops Test
Expand All @@ -29,6 +30,7 @@ steps:
- label: Distributed Tests
working_dir: "/vllm-workspace/tests/distributed"
num_gpus: 2 # only support 1 or 2 for now.
mirror_hardwares: [amd]
commands:
- pytest -v -s test_pynccl.py
- pytest -v -s test_pynccl_library.py
Expand All @@ -38,6 +40,7 @@ steps:
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf pytest -v -s test_chunked_prefill_distributed.py

- label: Engine Test
mirror_hardwares: [amd]
command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py

- label: Entrypoints Test
Expand All @@ -48,6 +51,7 @@ steps:

- label: Examples Test
working_dir: "/vllm-workspace/examples"
mirror_hardwares: [amd]
commands:
# install aws cli for llava_example.py
- pip install awscli
Expand All @@ -61,29 +65,35 @@ steps:
parallelism: 4

- label: Models Test
mirror_hardwares: [amd]
commands:
- bash ../.buildkite/download-images.sh
- pytest -v -s models --ignore=models/test_llava.py --ignore=models/test_mistral.py

- label: Llava Test
mirror_hardwares: [amd]
commands:
- bash ../.buildkite/download-images.sh
- pytest -v -s models/test_llava.py

- label: Prefix Caching Test
mirror_hardwares: [amd]
commands:
- pytest -v -s prefix_caching

- label: Samplers Test
command: pytest -v -s samplers

- label: LogitsProcessor Test
mirror_hardwares: [amd]
command: pytest -v -s test_logits_processor.py

- label: Worker Test
mirror_hardwares: [amd]
command: pytest -v -s worker

- label: Speculative decoding tests
mirror_hardwares: [amd]
command: pytest -v -s spec_decode

- label: LoRA Test %N
Expand All @@ -101,6 +111,7 @@ steps:

- label: Benchmarks
working_dir: "/vllm-workspace/.buildkite"
mirror_hardwares: [amd]
commands:
- pip install aiohttp
- bash run-benchmarks.sh
Expand Down
21 changes: 16 additions & 5 deletions .buildkite/test-template.j2
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,29 @@ steps:
limit: 5
- wait

- label: "AMD Test"
agents:
queue: amd
command: bash .buildkite/run-amd-test.sh
- group: "AMD Tests"
depends_on: ~
steps:
{% for step in steps %}
{% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
- label: "AMD: {{ step.label }}"
agents:
queue: amd
command: bash .buildkite/run-amd-test.sh "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
env:
DOCKER_BUILDKIT: "1"
{% endif %}
{% endfor %}

- label: "Neuron Test"
depends_on: ~
agents:
queue: neuron
command: bash .buildkite/run-neuron-test.sh
soft_fail: true

- label: "CPU Test"
- label: "Intel Test"
depends_on: ~
command: bash .buildkite/run-cpu-test.sh

{% for step in steps %}
Expand Down
13 changes: 7 additions & 6 deletions Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y \

### Mount Point ###
# When launching the container, mount the code directory to /app
ARG APP_MOUNT=/app
ARG APP_MOUNT=/vllm-workspace
VOLUME [ ${APP_MOUNT} ]
WORKDIR ${APP_MOUNT}

Expand Down Expand Up @@ -89,15 +89,16 @@ RUN if [ "$BUILD_TRITON" = "1" ]; then \
&& cd ../..; \
fi

COPY ./ /app/vllm
WORKDIR /vllm-workspace
COPY . .

RUN python3 -m pip install --upgrade pip numba

RUN cd /app \
&& cd vllm \
&& pip install -U -r requirements-rocm.txt \
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h /app/vllm/rocm_patch/rocm_bf16.patch \
RUN --mount=type=cache,target=/root/.cache/pip \
pip install -U -r requirements-rocm.txt \
&& patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \
&& python3 setup.py install \
&& cp build/lib.linux-x86_64-cpython-39/vllm/_C.cpython-39-x86_64-linux-gnu.so vllm/ \
&& cd ..

RUN python3 -m pip install --upgrade pip
Expand Down
Loading