Skip to content

Commit

Permalink
[Bugfix] Use LoadFormat values for vllm serve --load-format (#7784)
Browse files Browse the repository at this point in the history
  • Loading branch information
mgoin authored Aug 22, 2024
1 parent 57792ed commit 15310b5
Showing 1 changed file with 3 additions and 6 deletions.
9 changes: 3 additions & 6 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

import vllm.envs as envs
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
ObservabilityConfig, ParallelConfig,
EngineConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ObservabilityConfig, ParallelConfig,
PromptAdapterConfig, SchedulerConfig,
SpeculativeConfig, TokenizerPoolConfig)
from vllm.executor.executor_base import ExecutorBase
Expand Down Expand Up @@ -214,10 +214,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
'--load-format',
type=str,
default=EngineArgs.load_format,
choices=[
'auto', 'pt', 'safetensors', 'npcache', 'dummy', 'tensorizer',
'bitsandbytes'
],
choices=[f.value for f in LoadFormat],
help='The format of the model weights to load.\n\n'
'* "auto" will try to load the weights in the safetensors format '
'and fall back to the pytorch bin format if safetensors format '
Expand Down

0 comments on commit 15310b5

Please sign in to comment.