File tree Expand file tree Collapse file tree 1 file changed +6
-5
lines changed Expand file tree Collapse file tree 1 file changed +6
-5
lines changed Original file line number Diff line number Diff line change @@ -153,13 +153,14 @@ from vllm import LLM
153
153
154
154
llm = LLM(
155
155
model = " Qwen/Qwen2.5-VL-72B-Instruct" ,
156
- # Create two EngineCore instances, one per DP rank
157
- data_parallel_size = 2 ,
158
- # Within each EngineCore instance:
159
- # The vision encoder uses TP=4 (not DP=2) to shard the input data
160
- # The language decoder uses TP=4 to shard the weights as usual
161
156
tensor_parallel_size = 4 ,
157
+ # When mm_encoder_tp_mode="data",
158
+ # the vision encoder uses TP=4 (not DP=1) to shard the input data,
159
+ # so the TP size becomes the effective DP size.
160
+ # Note that this is independent of the DP size for language decoder which is used in expert parallel setting.
162
161
mm_encoder_tp_mode = " data" ,
162
+ # The language decoder uses TP=4 to shard the weights regardless
163
+ # of the setting of mm_encoder_tp_mode
163
164
)
164
165
```
165
166
You can’t perform that action at this time.
0 commit comments