Skip to content

Test Queues #456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
19 changes: 19 additions & 0 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ steps:
- VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py

- label: Core Test # 10min
working_dir: "/vllm-workspace/tests"
mirror_hardwares: [amd]
amd_gpus: 4 # Just for the sake of queue testing
fast_check: true
source_file_dependencies:
- vllm/core
Expand All @@ -105,6 +107,7 @@ steps:
working_dir: "/vllm-workspace/tests"
fast_check: true
mirror_hardwares: [amd]
amd_gpus: 1 # Just for the sake of queue testing
source_file_dependencies:
- vllm/
commands:
Expand Down Expand Up @@ -158,6 +161,7 @@ steps:

- label: Regression Test # 5min
mirror_hardwares: [amd]
amd_gpus: 1
source_file_dependencies:
- vllm/
- tests/test_regression
Expand All @@ -168,6 +172,7 @@ steps:

- label: Engine Test # 10min
mirror_hardwares: [amd]
amd_gpus: 1
source_file_dependencies:
- vllm/
- tests/engine
Expand All @@ -176,6 +181,7 @@ steps:
- pytest -v -s engine test_sequence.py test_config.py test_logger.py
# OOM in the CI unless we run this separately
- pytest -v -s tokenization
working_dir: "/vllm-workspace/tests" # optional

- label: V1 Test
#mirror_hardwares: [amd]
Expand Down Expand Up @@ -217,7 +223,9 @@ steps:
- python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2

- label: Prefix Caching Test # 9min
working_dir: "/vllm-workspace/tests"
mirror_hardwares: [amd]
amd_gpus: 1
source_file_dependencies:
- vllm/
- tests/prefix_caching
Expand All @@ -235,7 +243,9 @@ steps:
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers

- label: LogitsProcessor Test # 5min
working_dir: "/vllm-workspace/tests"
mirror_hardwares: [amd]
amd_gpus: 1
source_file_dependencies:
- vllm/model_executor/layers
- vllm/model_executor/guided_decoding
Expand All @@ -256,7 +266,9 @@ steps:
- pytest -v -s spec_decode/e2e/test_eagle_correctness.py

- label: LoRA Test %N # 15min each
working_dir: "/vllm-workspace/tests"
mirror_hardwares: [amd]
amd_gpus: 8
source_file_dependencies:
- vllm/lora
- tests/lora
Expand All @@ -282,7 +294,9 @@ steps:
- pytest -v -s compile/test_full_graph.py

- label: Kernels Test %N # 1h each
working_dir: "/vllm-workspace/tests"
mirror_hardwares: [amd]
amd_gpus: 8
source_file_dependencies:
- csrc/
- vllm/attention
Expand All @@ -292,8 +306,10 @@ steps:
parallelism: 4

- label: Tensorizer Test # 11min
working_dir: "/vllm-workspace/tests"
mirror_hardwares: [amd]
soft_fail: true
amd_gpus: 1
source_file_dependencies:
- vllm/model_executor/model_loader
- tests/tensorizer_loader
Expand All @@ -305,6 +321,7 @@ steps:
- label: Benchmarks # 9min
working_dir: "/vllm-workspace/.buildkite"
mirror_hardwares: [amd]
amd_gpus: 1
source_file_dependencies:
- benchmarks/
commands:
Expand Down Expand Up @@ -334,8 +351,10 @@ steps:
- pytest -v -s encoder_decoder

- label: OpenAI-Compatible Tool Use # 20 min
working_dir: "/vllm-workspace/tests"
fast_check: false
mirror_hardwares: [ amd ]
amd_gpus: 1
source_file_dependencies:
- vllm/
- tests/tool_use
Expand Down
12 changes: 10 additions & 2 deletions .buildkite/test-template.j2
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ steps:
- label: ":docker: build image"
depends_on: ~
commands:
- "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ."
- "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --target test --progress plain ."
- "docker push {{ docker_image_amd }}"
key: "amd-build"
env:
Expand All @@ -27,7 +27,15 @@ steps:
depends_on:
- "amd-build"
agents:
queue: amd_gpu
{% if step.amd_gpus and step.amd_gpus==8%}
queue: amd_gpu_8
{% elif step.amd_gpus and step.amd_gpus==4%}
queue: amd_gpu_4
{% elif step.amd_gpus and step.amd_gpus==2%}
queue: amd_gpu_4
{% else%}
queue: amd_gpu_1
{% endif%}
commands:
- bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"
env:
Expand Down
1 change: 1 addition & 0 deletions Dockerfile.rocm
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ ARG COMMON_WORKDIR
# Copy over the benchmark scripts as well
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
# "Dummy alternation"

ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ENV TOKENIZERS_PARALLELISM=false
Expand Down