Skip to content

Commit e839e2e

Browse files
jennyyyyzhenyZhen
authored andcommitted
[Multi Modal] Add an env var for message queue max chunk bytes (vllm-project#19242)
Signed-off-by: yZhen <yZhen@fb.com> Co-authored-by: yZhen <yZhen@fb.com> Signed-off-by: minpeter <kali2005611@gmail.com>
1 parent a3e7f13 commit e839e2e

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

vllm/envs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840
124124
VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1
125125
VLLM_SLEEP_WHEN_IDLE: bool = False
126+
VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16
126127

127128

128129
def get_default_cache_root():
@@ -847,6 +848,12 @@ def get_vllm_port() -> Optional[int]:
847848
# latency penalty when a request eventually comes.
848849
"VLLM_SLEEP_WHEN_IDLE":
849850
lambda: bool(int(os.getenv("VLLM_SLEEP_WHEN_IDLE", "0"))),
851+
852+
# Control the max chunk bytes (in MB) for the rpc message queue.
853+
# Object larger than this threshold will be broadcast to worker
854+
# processes via zmq.
855+
"VLLM_MQ_MAX_CHUNK_BYTES_MB":
856+
lambda: int(os.getenv("VLLM_MQ_MAX_CHUNK_BYTES_MB", "16")),
850857
}
851858

852859
# --8<-- [end:env-vars-definition]

vllm/v1/executor/multiproc_executor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import cloudpickle
2222

23+
import vllm.envs as envs
2324
from vllm.config import VllmConfig
2425
from vllm.distributed import (destroy_distributed_environment,
2526
destroy_model_parallel)
@@ -72,7 +73,10 @@ def _init_executor(self) -> None:
7273

7374
# Initialize worker and set up message queues for SchedulerOutputs
7475
# and ModelRunnerOutputs
75-
self.rpc_broadcast_mq = MessageQueue(self.world_size, self.world_size)
76+
max_chunk_bytes = envs.VLLM_MQ_MAX_CHUNK_BYTES_MB * 1024 * 1024
77+
self.rpc_broadcast_mq = MessageQueue(self.world_size,
78+
self.world_size,
79+
max_chunk_bytes=max_chunk_bytes)
7680
scheduler_output_handle = self.rpc_broadcast_mq.export_handle()
7781

7882
# Create workers

0 commit comments

Comments
 (0)