diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7f45c3d06375a..4cbd728714bc0 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -9,8 +9,8 @@ import vllm.envs as envs from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, - EngineConfig, LoadConfig, LoRAConfig, ModelConfig, - ObservabilityConfig, ParallelConfig, + EngineConfig, LoadConfig, LoadFormat, LoRAConfig, + ModelConfig, ObservabilityConfig, ParallelConfig, PromptAdapterConfig, SchedulerConfig, SpeculativeConfig, TokenizerPoolConfig) from vllm.executor.executor_base import ExecutorBase @@ -214,10 +214,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: '--load-format', type=str, default=EngineArgs.load_format, - choices=[ - 'auto', 'pt', 'safetensors', 'npcache', 'dummy', 'tensorizer', - 'bitsandbytes' - ], + choices=[f.value for f in LoadFormat], help='The format of the model weights to load.\n\n' '* "auto" will try to load the weights in the safetensors format ' 'and fall back to the pytorch bin format if safetensors format '