@@ -33,14 +33,13 @@ steps:
33
33
34
34
- label : Documentation Build # 2min
35
35
mirror_hardwares : [amdexperimental]
36
- working_dir : " /vllm-workspace/test_docs/docs "
36
+ working_dir : " /vllm-workspace/test_docs"
37
37
fast_check : true
38
38
no_gpu : True
39
39
commands :
40
- - pip install -r ../../requirements/docs.txt
41
- - SPHINXOPTS=\"-W\" make html
42
- # Check API reference (if it fails, you may have missing mock imports)
43
- - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
40
+ - pip install -r ../requirements/docs.txt
41
+ # TODO: add `--strict` once warnings in docstrings are fixed
42
+ - mkdocs build
44
43
45
44
- label : Async Engine, Inputs, Utils, Worker Test # 24min
46
45
mirror_hardwares : [amdexperimental]
59
58
- pytest -v -s async_engine # AsyncLLMEngine
60
59
- NUM_SCHEDULER_STEPS=4 pytest -v -s async_engine/test_async_llm_engine.py
61
60
- pytest -v -s test_inputs.py
61
+ - pytest -v -s test_outputs.py
62
62
- pytest -v -s multimodal
63
63
- pytest -v -s test_utils.py # Utils
64
64
- pytest -v -s worker # Worker
@@ -128,7 +128,7 @@ steps:
128
128
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
129
129
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
130
130
- VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
131
- - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration .py --ignore=entrypoints/openai/test_chat_with_tool_reasoning .py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
131
+ - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning .py --ignore=entrypoints/openai/test_oot_registration .py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/
132
132
- pytest -v -s entrypoints/test_chat_utils.py
133
133
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
134
134
@@ -141,6 +141,7 @@ steps:
141
141
- vllm/core/
142
142
- tests/distributed/test_utils
143
143
- tests/distributed/test_pynccl
144
+ - tests/distributed/test_events
144
145
- tests/spec_decode/e2e/test_integration_dist_tp4
145
146
- tests/compile/test_basic_correctness
146
147
- examples/offline_inference/rlhf.py
@@ -159,6 +160,7 @@ steps:
159
160
- pytest -v -s distributed/test_utils.py
160
161
- pytest -v -s compile/test_basic_correctness.py
161
162
- pytest -v -s distributed/test_pynccl.py
163
+ - pytest -v -s distributed/test_events.py
162
164
- pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
163
165
# TODO: create a dedicated test section for multi-GPU example tests
164
166
# when we have multiple distributed example tests
@@ -224,6 +226,7 @@ steps:
224
226
- pytest -v -s v1/test_serial_utils.py
225
227
- pytest -v -s v1/test_utils.py
226
228
- pytest -v -s v1/test_oracle.py
229
+ - pytest -v -s v1/test_metrics_reader.py
227
230
# TODO: accuracy does not match, whether setting
228
231
# VLLM_USE_FLASHINFER_SAMPLER or not on H100.
229
232
- pytest -v -s v1/e2e
@@ -248,7 +251,7 @@ steps:
248
251
- python3 offline_inference/vision_language.py --seed 0
249
252
- python3 offline_inference/vision_language_embedding.py --seed 0
250
253
- python3 offline_inference/vision_language_multi_image.py --seed 0
251
- - VLLM_USE_V1=0 python3 other /tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other /tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
254
+ - VLLM_USE_V1=0 python3 others /tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others /tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
252
255
- python3 offline_inference/encoder_decoder.py
253
256
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
254
257
- python3 offline_inference/basic/classify.py
@@ -320,6 +323,7 @@ steps:
320
323
- pytest -v -s compile/test_fusion.py
321
324
- pytest -v -s compile/test_silu_mul_quant_fusion.py
322
325
- pytest -v -s compile/test_sequence_parallelism.py
326
+ - pytest -v -s compile/test_async_tp.py
323
327
324
328
- label : PyTorch Fullgraph Smoke Test # 9min
325
329
mirror_hardwares : [amdexperimental, amdproduction]
@@ -397,10 +401,12 @@ steps:
397
401
source_file_dependencies :
398
402
- vllm/model_executor/model_loader
399
403
- tests/tensorizer_loader
404
+ - tests/entrypoints/openai/test_tensorizer_entrypoint.py
400
405
commands :
401
406
- apt-get update && apt-get install -y curl libsodium23
402
407
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
403
408
- pytest -v -s tensorizer_loader
409
+ - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
404
410
405
411
- label : Benchmarks # 9min
406
412
mirror_hardwares : [amdexperimental, amdproduction]
@@ -479,10 +485,7 @@ steps:
479
485
- pytest -v -s models/test_registry.py
480
486
- pytest -v -s models/test_utils.py
481
487
- pytest -v -s models/test_vision.py
482
- # V1 Test: https://github.com/vllm-project/vllm/issues/14531
483
- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
484
- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
485
- - VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
488
+ - pytest -v -s models/test_initialization.py
486
489
487
490
- label : Language Models Test (Standard)
488
491
mirror_hardwares : [amdexperimental]
@@ -496,16 +499,25 @@ steps:
496
499
- pip freeze | grep -E 'torch'
497
500
- pytest -v -s models/language -m core_model
498
501
499
- - label : Language Models Test (Extended)
502
+ - label : Language Models Test (Extended Generation) # 1hr20min
500
503
mirror_hardwares : [amdexperimental]
501
504
optional : true
502
505
source_file_dependencies :
503
506
- vllm/
504
- - tests/models/language
507
+ - tests/models/language/generation
505
508
commands :
506
509
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
507
510
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
508
- - pytest -v -s models/language -m 'not core_model'
511
+ - pytest -v -s models/language/generation -m 'not core_model'
512
+
513
+ - label : Language Models Test (Extended Pooling) # 36min
514
+ mirror_hardwares : [amdexperimental]
515
+ optional : true
516
+ source_file_dependencies :
517
+ - vllm/
518
+ - tests/models/language/pooling
519
+ commands :
520
+ - pytest -v -s models/language/pooling -m 'not core_model'
509
521
510
522
- label : Multi-Modal Models Test (Standard)
511
523
mirror_hardwares : [amdexperimental]
0 commit comments