|  | 
| 28 | 28 |                          DeviceConfig, DistributedExecutorBackend, | 
| 29 | 29 |                          GuidedDecodingBackend, HfOverrides, KVEventsConfig, | 
| 30 | 30 |                          KVTransferConfig, LoadConfig, LogprobsMode, | 
| 31 |  | -                         LoRAConfig, MambaDType, ModelConfig, ModelDType, | 
| 32 |  | -                         ModelImpl, MultiModalConfig, ObservabilityConfig, | 
| 33 |  | -                         ParallelConfig, PoolerConfig, PrefixCachingHashAlgo, | 
| 34 |  | -                         RunnerOption, SchedulerConfig, SchedulerPolicy, | 
| 35 |  | -                         SpeculativeConfig, TaskOption, TokenizerMode, | 
| 36 |  | -                         VllmConfig, get_attr_docs, get_field) | 
|  | 31 | +                         LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig, | 
|  | 32 | +                         ModelDType, ModelImpl, MultiModalConfig, | 
|  | 33 | +                         ObservabilityConfig, ParallelConfig, PoolerConfig, | 
|  | 34 | +                         PrefixCachingHashAlgo, RunnerOption, SchedulerConfig, | 
|  | 35 | +                         SchedulerPolicy, SpeculativeConfig, TaskOption, | 
|  | 36 | +                         TokenizerMode, VllmConfig, get_attr_docs, get_field) | 
| 37 | 37 | from vllm.logger import init_logger | 
| 38 | 38 | from vllm.platforms import CpuArchEnum, current_platform | 
| 39 | 39 | from vllm.plugins import load_general_plugins | 
| @@ -352,6 +352,7 @@ class EngineArgs: | 
| 352 | 352 |         MultiModalConfig.mm_processor_kwargs | 
| 353 | 353 |     disable_mm_preprocessor_cache: bool = False  # DEPRECATED | 
| 354 | 354 |     mm_processor_cache_gb: int = MultiModalConfig.mm_processor_cache_gb | 
|  | 355 | +    mm_encoder_tp_mode: MMEncoderTPMode = MultiModalConfig.mm_encoder_tp_mode | 
| 355 | 356 |     skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling | 
| 356 | 357 |     # LoRA fields | 
| 357 | 358 |     enable_lora: bool = False | 
| @@ -434,16 +435,14 @@ class EngineArgs: | 
| 434 | 435 |     use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load | 
| 435 | 436 |     pt_load_map_location: str = LoadConfig.pt_load_map_location | 
| 436 | 437 | 
 | 
| 437 |  | -    enable_multimodal_encoder_data_parallel: bool = \ | 
| 438 |  | -        ParallelConfig.enable_multimodal_encoder_data_parallel | 
|  | 438 | +    # DEPRECATED | 
|  | 439 | +    enable_multimodal_encoder_data_parallel: bool = False | 
| 439 | 440 | 
 | 
| 440 | 441 |     logits_processors: Optional[list[Union[ | 
| 441 | 442 |         str, type[LogitsProcessor]]]] = ModelConfig.logits_processors | 
| 442 | 443 |     """Custom logitproc types""" | 
| 443 | 444 | 
 | 
| 444 | 445 |     async_scheduling: bool = SchedulerConfig.async_scheduling | 
| 445 |  | -    # DEPRECATED | 
| 446 |  | -    enable_prompt_adapter: bool = False | 
| 447 | 446 | 
 | 
| 448 | 447 |     kv_sharing_fast_prefill: bool = \ | 
| 449 | 448 |         CacheConfig.kv_sharing_fast_prefill | 
| @@ -685,7 +684,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: | 
| 685 | 684 |                                     **parallel_kwargs["worker_extension_cls"]) | 
| 686 | 685 |         parallel_group.add_argument( | 
| 687 | 686 |             "--enable-multimodal-encoder-data-parallel", | 
| 688 |  | -            **parallel_kwargs["enable_multimodal_encoder_data_parallel"]) | 
|  | 687 | +            action="store_true", | 
|  | 688 | +            deprecated=True) | 
| 689 | 689 | 
 | 
| 690 | 690 |         # KV cache arguments | 
| 691 | 691 |         cache_kwargs = get_kwargs(CacheConfig) | 
| @@ -735,6 +735,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: | 
| 735 | 735 |         multimodal_group.add_argument("--disable-mm-preprocessor-cache", | 
| 736 | 736 |                                       action="store_true", | 
| 737 | 737 |                                       deprecated=True) | 
|  | 738 | +        multimodal_group.add_argument( | 
|  | 739 | +            "--mm-encoder-tp-mode", **multimodal_kwargs["mm_encoder_tp_mode"]) | 
| 738 | 740 |         multimodal_group.add_argument( | 
| 739 | 741 |             "--interleave-mm-strings", | 
| 740 | 742 |             **multimodal_kwargs["interleave_mm_strings"]) | 
| @@ -909,6 +911,14 @@ def create_model_config(self) -> ModelConfig: | 
| 909 | 911 | 
 | 
| 910 | 912 |             self.mm_processor_cache_gb = envs.VLLM_MM_INPUT_CACHE_GIB | 
| 911 | 913 | 
 | 
|  | 914 | +        if self.enable_multimodal_encoder_data_parallel: | 
|  | 915 | +            logger.warning( | 
|  | 916 | +                "--enable-multimodal-encoder-data-parallel` is deprecated " | 
|  | 917 | +                "and will be removed in v0.13. " | 
|  | 918 | +                "Please use `--mm-encoder-tp-mode data` instead.") | 
|  | 919 | + | 
|  | 920 | +            self.mm_encoder_tp_mode = "data" | 
|  | 921 | + | 
| 912 | 922 |         return ModelConfig( | 
| 913 | 923 |             model=self.model, | 
| 914 | 924 |             hf_config_path=self.hf_config_path, | 
| @@ -947,6 +957,7 @@ def create_model_config(self) -> ModelConfig: | 
| 947 | 957 |             config_format=self.config_format, | 
| 948 | 958 |             mm_processor_kwargs=self.mm_processor_kwargs, | 
| 949 | 959 |             mm_processor_cache_gb=self.mm_processor_cache_gb, | 
|  | 960 | +            mm_encoder_tp_mode=self.mm_encoder_tp_mode, | 
| 950 | 961 |             override_neuron_config=self.override_neuron_config, | 
| 951 | 962 |             override_pooler_config=self.override_pooler_config, | 
| 952 | 963 |             logits_processor_pattern=self.logits_processor_pattern, | 
| @@ -1258,8 +1269,6 @@ def create_engine_config( | 
| 1258 | 1269 |             distributed_executor_backend=self.distributed_executor_backend, | 
| 1259 | 1270 |             worker_cls=self.worker_cls, | 
| 1260 | 1271 |             worker_extension_cls=self.worker_extension_cls, | 
| 1261 |  | -            enable_multimodal_encoder_data_parallel=self. | 
| 1262 |  | -            enable_multimodal_encoder_data_parallel, | 
| 1263 | 1272 |         ) | 
| 1264 | 1273 | 
 | 
| 1265 | 1274 |         if model_config.is_multimodal_model: | 
|  | 
0 commit comments