Skip to content

Commit b575d2d

Browse files
zspoNikolaBorisov
authored andcommitted
fix some bugs (vllm-project#2689)
1 parent c6f2467 commit b575d2d

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

vllm/config.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,9 @@ class ParallelConfig:
355355
worker_use_ray: Whether to use Ray for model workers. Will be set to
356356
True if either pipeline_parallel_size or tensor_parallel_size is
357357
greater than 1.
358+
max_parallel_loading_workers: Maximum number of multiple batches
359+
when load model sequentially. To avoid RAM OOM when using tensor
360+
parallel and large models.
358361
disable_custom_all_reduce: Disable the custom all-reduce kernel and
359362
fall back to NCCL.
360363
"""
@@ -470,7 +473,7 @@ def __post_init__(self):
470473
elif self.max_cpu_loras < self.max_loras:
471474
raise ValueError(
472475
f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
473-
f"max_num_seqs ({self.max_loras})")
476+
f"max_loras ({self.max_loras})")
474477

475478
def verify_with_model_config(self, model_config: ModelConfig):
476479
if self.lora_dtype in (None, "auto"):

vllm/engine/async_llm_engine.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,8 @@ class AsyncLLMEngine:
299299
async frontend will be executed in a separate process as the
300300
model workers.
301301
log_requests: Whether to log the requests.
302+
max_log_len: Maximum number of prompt characters or prompt ID numbers
303+
being printed in log.
302304
start_engine_loop: If True, the background task to run the engine
303305
will be automatically started in the generate call.
304306
*args: Arguments for LLMEngine.
@@ -434,8 +436,8 @@ async def add_request(
434436
logger.info(f"Received request {request_id}: "
435437
f"prompt: {shortened_prompt!r}, "
436438
f"prefix_pos: {prefix_pos},"
437-
f"sampling params: {sampling_params}, "
438-
f"prompt token ids: {shortened_token_ids}, "
439+
f"sampling_params: {sampling_params}, "
440+
f"prompt_token_ids: {shortened_token_ids}, "
439441
f"lora_request: {lora_request}.")
440442

441443
if not self.is_running:

0 commit comments

Comments
 (0)