File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -355,6 +355,9 @@ class ParallelConfig:
355
355
worker_use_ray: Whether to use Ray for model workers. Will be set to
356
356
True if either pipeline_parallel_size or tensor_parallel_size is
357
357
greater than 1.
358
+ max_parallel_loading_workers: Maximum number of multiple batches
359
+ when load model sequentially. To avoid RAM OOM when using tensor
360
+ parallel and large models.
358
361
disable_custom_all_reduce: Disable the custom all-reduce kernel and
359
362
fall back to NCCL.
360
363
"""
@@ -470,7 +473,7 @@ def __post_init__(self):
470
473
elif self .max_cpu_loras < self .max_loras :
471
474
raise ValueError (
472
475
f"max_cpu_loras ({ self .max_cpu_loras } ) must be >= "
473
- f"max_num_seqs ({ self .max_loras } )" )
476
+ f"max_loras ({ self .max_loras } )" )
474
477
475
478
def verify_with_model_config (self , model_config : ModelConfig ):
476
479
if self .lora_dtype in (None , "auto" ):
Original file line number Diff line number Diff line change @@ -299,6 +299,8 @@ class AsyncLLMEngine:
299
299
async frontend will be executed in a separate process as the
300
300
model workers.
301
301
log_requests: Whether to log the requests.
302
+ max_log_len: Maximum number of prompt characters or prompt ID numbers
303
+ being printed in log.
302
304
start_engine_loop: If True, the background task to run the engine
303
305
will be automatically started in the generate call.
304
306
*args: Arguments for LLMEngine.
@@ -434,8 +436,8 @@ async def add_request(
434
436
logger .info (f"Received request { request_id } : "
435
437
f"prompt: { shortened_prompt !r} , "
436
438
f"prefix_pos: { prefix_pos } ,"
437
- f"sampling params : { sampling_params } , "
438
- f"prompt token ids : { shortened_token_ids } , "
439
+ f"sampling_params : { sampling_params } , "
440
+ f"prompt_token_ids : { shortened_token_ids } , "
439
441
f"lora_request: { lora_request } ." )
440
442
441
443
if not self .is_running :
You can’t perform that action at this time.
0 commit comments