@@ -33,14 +33,13 @@ steps:
3333
3434- label : Documentation Build # 2min
3535 mirror_hardwares : [amdexperimental]
36- working_dir : " /vllm-workspace/test_docs/docs "
36+ working_dir : " /vllm-workspace/test_docs"
3737 fast_check : true
3838 no_gpu : True
3939 commands :
40- - pip install -r ../../requirements/docs.txt
41- - SPHINXOPTS=\"-W\" make html
42- # Check API reference (if it fails, you may have missing mock imports)
43- - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
40+ - pip install -r ../requirements/docs.txt
41+ # TODO: add `--strict` once warnings in docstrings are fixed
42+ - mkdocs build
4443
4544- label : Async Engine, Inputs, Utils, Worker Test # 24min
4645 mirror_hardwares : [amdexperimental]
5958 - pytest -v -s async_engine # AsyncLLMEngine
6059 - NUM_SCHEDULER_STEPS=4 pytest -v -s async_engine/test_async_llm_engine.py
6160 - pytest -v -s test_inputs.py
61+ - pytest -v -s test_outputs.py
6262 - pytest -v -s multimodal
6363 - pytest -v -s test_utils.py # Utils
6464 - pytest -v -s worker # Worker
@@ -128,7 +128,7 @@ steps:
128128 - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
129129 - pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
130130 - VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
131- - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration .py --ignore=entrypoints/openai/test_chat_with_tool_reasoning .py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
131+ - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning .py --ignore=entrypoints/openai/test_oot_registration .py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/
132132 - pytest -v -s entrypoints/test_chat_utils.py
133133 - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
134134
@@ -141,6 +141,7 @@ steps:
141141 - vllm/core/
142142 - tests/distributed/test_utils
143143 - tests/distributed/test_pynccl
144+ - tests/distributed/test_events
144145 - tests/spec_decode/e2e/test_integration_dist_tp4
145146 - tests/compile/test_basic_correctness
146147 - examples/offline_inference/rlhf.py
@@ -159,6 +160,7 @@ steps:
159160 - pytest -v -s distributed/test_utils.py
160161 - pytest -v -s compile/test_basic_correctness.py
161162 - pytest -v -s distributed/test_pynccl.py
163+ - pytest -v -s distributed/test_events.py
162164 - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
163165 # TODO: create a dedicated test section for multi-GPU example tests
164166 # when we have multiple distributed example tests
@@ -224,6 +226,7 @@ steps:
224226 - pytest -v -s v1/test_serial_utils.py
225227 - pytest -v -s v1/test_utils.py
226228 - pytest -v -s v1/test_oracle.py
229+ - pytest -v -s v1/test_metrics_reader.py
227230 # TODO: accuracy does not match, whether setting
228231 # VLLM_USE_FLASHINFER_SAMPLER or not on H100.
229232 - pytest -v -s v1/e2e
@@ -248,7 +251,7 @@ steps:
248251 - python3 offline_inference/vision_language.py --seed 0
249252 - python3 offline_inference/vision_language_embedding.py --seed 0
250253 - python3 offline_inference/vision_language_multi_image.py --seed 0
251- - VLLM_USE_V1=0 python3 other /tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other /tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
254+ - VLLM_USE_V1=0 python3 others /tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others /tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
252255 - python3 offline_inference/encoder_decoder.py
253256 - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
254257 - python3 offline_inference/basic/classify.py
@@ -320,6 +323,7 @@ steps:
320323 - pytest -v -s compile/test_fusion.py
321324 - pytest -v -s compile/test_silu_mul_quant_fusion.py
322325 - pytest -v -s compile/test_sequence_parallelism.py
326+ - pytest -v -s compile/test_async_tp.py
323327
324328- label : PyTorch Fullgraph Smoke Test # 9min
325329 mirror_hardwares : [amdexperimental, amdproduction]
@@ -397,10 +401,12 @@ steps:
397401 source_file_dependencies :
398402 - vllm/model_executor/model_loader
399403 - tests/tensorizer_loader
404+ - tests/entrypoints/openai/test_tensorizer_entrypoint.py
400405 commands :
401406 - apt-get update && apt-get install -y curl libsodium23
402407 - export VLLM_WORKER_MULTIPROC_METHOD=spawn
403408 - pytest -v -s tensorizer_loader
409+ - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
404410
405411- label : Benchmarks # 9min
406412 mirror_hardwares : [amdexperimental, amdproduction]
@@ -479,10 +485,7 @@ steps:
479485 - pytest -v -s models/test_registry.py
480486 - pytest -v -s models/test_utils.py
481487 - pytest -v -s models/test_vision.py
482- # V1 Test: https://github.com/vllm-project/vllm/issues/14531
483- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
484- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
485- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
488+ - pytest -v -s models/test_initialization.py
486489
487490- label : Language Models Test (Standard)
488491 mirror_hardwares : [amdexperimental]
@@ -496,16 +499,25 @@ steps:
496499 - pip freeze | grep -E 'torch'
497500 - pytest -v -s models/language -m core_model
498501
499- - label : Language Models Test (Extended)
502+ - label : Language Models Test (Extended Generation) # 1hr20min
500503 mirror_hardwares : [amdexperimental]
501504 optional : true
502505 source_file_dependencies :
503506 - vllm/
504- - tests/models/language
507+ - tests/models/language/generation
505508 commands :
506509 # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
507510 - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
508- - pytest -v -s models/language -m 'not core_model'
511+ - pytest -v -s models/language/generation -m 'not core_model'
512+
513+ - label : Language Models Test (Extended Pooling) # 36min
514+ mirror_hardwares : [amdexperimental]
515+ optional : true
516+ source_file_dependencies :
517+ - vllm/
518+ - tests/models/language/pooling
519+ commands :
520+ - pytest -v -s models/language/pooling -m 'not core_model'
509521
510522- label : Multi-Modal Models Test (Standard)
511523 mirror_hardwares : [amdexperimental]
0 commit comments