Skip to content

Commit 4e5affe

Browse files
authored
[CI] Add Decode Context Parallelism (DCP) test to CI (#24487)
Signed-off-by: Ming Yang <minos.future@gmail.com>
1 parent e4f0b4c commit 4e5affe

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -946,7 +946,6 @@ steps:
946946
commands:
947947
- pytest -v -s distributed/test_pp_cudagraph.py
948948
- pytest -v -s distributed/test_pipeline_parallel.py
949-
# - pytest -v -s distributed/test_context_parallel.py # TODO: enable it on Hopper runners or add triton MLA support
950949

951950
- label: LoRA TP Test (Distributed) # 17 min
952951
timeout_in_minutes: 30
@@ -1020,9 +1019,21 @@ steps:
10201019
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
10211020
- pytest -s -v test_lm_eval_correctness.py --config-list-file=configs/models-large.txt --tp-size=4
10221021

1023-
- label: Qwen MoE EP Test # optional
1022+
##### H200 test #####
1023+
- label: Distrubted Tests (H200) # optional
10241024
gpu: h200
10251025
optional: true
1026+
working_dir: "/vllm-workspace/"
1027+
num_gpus: 2
1028+
commands:
1029+
- pytest -v -s tests/distributed/test_context_parallel.py
1030+
- CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1031+
1032+
##### B200 test #####
1033+
- label: Distributed Tests (B200) # optional
1034+
gpu: b200
1035+
optional: true
1036+
working_dir: "/vllm-workspace/"
10261037
num_gpus: 2
10271038
commands:
1028-
- CUDA_VISIBLE_DEVICES=1,2 VLLM_ALL2ALL_BACKEND=deepep_high_throughput VLLM_USE_DEEP_GEMM=1 VLLM_LOGGING_LEVEL=DEBUG python3 /vllm-workspace/examples/offline_inference/data_parallel.py --model Qwen/Qwen1.5-MoE-A2.7B --tp-size=1 --dp-size=2 --max-model-len 2048
1039+
- pytest -v -s tests/distributed/test_context_parallel.py

tests/distributed/test_context_parallel.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,13 @@ def detailed(
7171
parallel_setups = []
7272
for eager_mode_val in [False]:
7373
for pp_multiplier in [1]:
74-
for dcp_multiplier in [2, 4]:
74+
for dcp_multiplier in [0.5, 1]:
7575
for chunked_prefill_val in [True]:
7676
parallel_setups.append(
7777
ParallelSetup(tp_size=tp_base,
7878
pp_size=pp_multiplier * pp_base,
79-
dcp_size=dcp_multiplier * dcp_base,
79+
dcp_size=int(dcp_multiplier *
80+
tp_base),
8081
eager_mode=eager_mode_val,
8182
chunked_prefill=chunked_prefill_val))
8283
return CPTestSettings(
@@ -223,7 +224,9 @@ def _compare_cp_with_tp(
223224

224225
CP_TEXT_GENERATION_MODELS = {
225226
# [MLA attention only]
226-
"deepseek-ai/DeepSeek-V2-Lite-Chat": CPTestSettings.detailed(),
227+
"deepseek-ai/DeepSeek-V2-Lite-Chat":
228+
[CPTestSettings.detailed(),
229+
CPTestSettings.detailed(tp_base=2)],
227230
}
228231

229232
CP_TEST_MODELS = [
@@ -238,7 +241,7 @@ def _compare_cp_with_tp(
238241
"runner", "test_options"),
239242
[
240243
params for model_id, settings in CP_TEXT_GENERATION_MODELS.items()
241-
for params in settings.iter_params(model_id)
244+
for setting in settings for params in setting.iter_params(model_id)
242245
if model_id in CP_TEST_MODELS
243246
],
244247
)

0 commit comments

Comments
 (0)