|
20 | 20 | from vllm.executor.executor_base import ExecutorBase
|
21 | 21 | from vllm.logger import init_logger
|
22 | 22 | from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
| 23 | +from vllm.plugins import load_general_plugins |
23 | 24 | from vllm.transformers_utils.utils import check_gguf_file
|
24 | 25 | from vllm.usage.usage_lib import UsageContext
|
25 | 26 | from vllm.utils import FlexibleArgumentParser, StoreBoolean
|
@@ -203,6 +204,8 @@ class EngineArgs:
|
203 | 204 |
|
204 | 205 | calculate_kv_scales: Optional[bool] = None
|
205 | 206 |
|
| 207 | + additional_config: Optional[Dict[str, Any]] = None |
| 208 | + |
206 | 209 | def __post_init__(self):
|
207 | 210 | if not self.tokenizer:
|
208 | 211 | self.tokenizer = self.model
|
@@ -984,6 +987,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
984 | 987 | 'be loaded from the model checkpoint if available. '
|
985 | 988 | 'Otherwise, the scales will default to 1.0.')
|
986 | 989 |
|
| 990 | + parser.add_argument( |
| 991 | + "--additional-config", |
| 992 | + type=json.loads, |
| 993 | + default=None, |
| 994 | + help="Additional config for specified platform in JSON format. " |
| 995 | + "Different platforms may support different configs. Make sure the " |
| 996 | + "configs are valid for the platform you are using. The input format" |
| 997 | + " is like '{\"config_key\":\"config_value\"}'") |
987 | 998 | return parser
|
988 | 999 |
|
989 | 1000 | @classmethod
|
@@ -1044,6 +1055,9 @@ def create_load_config(self) -> LoadConfig:
|
1044 | 1055 | def create_engine_config(self,
|
1045 | 1056 | usage_context: Optional[UsageContext] = None
|
1046 | 1057 | ) -> VllmConfig:
|
| 1058 | + from vllm.platforms import current_platform |
| 1059 | + current_platform.pre_register_and_update() |
| 1060 | + |
1047 | 1061 | if envs.VLLM_USE_V1:
|
1048 | 1062 | self._override_v1_engine_args(usage_context)
|
1049 | 1063 |
|
@@ -1287,6 +1301,7 @@ def create_engine_config(self,
|
1287 | 1301 | prompt_adapter_config=prompt_adapter_config,
|
1288 | 1302 | compilation_config=self.compilation_config,
|
1289 | 1303 | kv_transfer_config=self.kv_transfer_config,
|
| 1304 | + additional_config=self.additional_config, |
1290 | 1305 | )
|
1291 | 1306 |
|
1292 | 1307 | if envs.VLLM_USE_V1:
|
@@ -1347,6 +1362,12 @@ def add_cli_args(parser: FlexibleArgumentParser,
|
1347 | 1362 | parser.add_argument('--disable-log-requests',
|
1348 | 1363 | action='store_true',
|
1349 | 1364 | help='Disable logging requests.')
|
| 1365 | + # Initialize plugin to update the parser, for example, The plugin may |
| 1366 | + # adding a new kind of quantization method to --quantization argument or |
| 1367 | + # a new device to --device argument. |
| 1368 | + load_general_plugins() |
| 1369 | + from vllm.platforms import current_platform |
| 1370 | + current_platform.pre_register_and_update(parser) |
1350 | 1371 | return parser
|
1351 | 1372 |
|
1352 | 1373 |
|
|
0 commit comments