|  | 
| 12 | 12 |                                                cached_tokenizer_from_config) | 
| 13 | 13 | from vllm.utils import ClassRegistry | 
| 14 | 14 | 
 | 
| 15 |  | -from .cache import (BaseMultiModalProcessorCache, | 
| 16 |  | -                    processor_only_cache_from_config) | 
|  | 15 | +from .cache import BaseMultiModalProcessorCache | 
| 17 | 16 | from .processing import BaseMultiModalProcessor, BaseProcessingInfo | 
| 18 | 17 | from .profiling import (BaseDummyInputsBuilder, DummyDecoderData, | 
| 19 | 18 |                         DummyEncoderData, MultiModalProfiler) | 
| @@ -176,35 +175,6 @@ def get_max_tokens_per_item_by_nonzero_modality( | 
| 176 | 175 |             if mm_limits[key] > 0 | 
| 177 | 176 |         } | 
| 178 | 177 | 
 | 
| 179 |  | -    # TODO: Remove once V0 is gone | 
| 180 |  | -    def get_max_tokens_by_modality( | 
| 181 |  | -        self, | 
| 182 |  | -        model_config: "ModelConfig", | 
| 183 |  | -    ) -> Mapping[str, int]: | 
| 184 |  | -        """ | 
| 185 |  | -        Get the maximum number of tokens from each modality | 
| 186 |  | -        for profiling the memory usage of a model. | 
| 187 |  | -        """ | 
| 188 |  | -        cache = processor_only_cache_from_config(model_config, self) | 
| 189 |  | -        mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache) | 
| 190 |  | -        max_tokens_per_item = self.get_max_tokens_per_item_by_modality( | 
| 191 |  | -            model_config, | 
| 192 |  | -            cache=cache, | 
| 193 |  | -        ) | 
| 194 |  | - | 
| 195 |  | -        return { | 
| 196 |  | -            key: mm_limits[key] * max_tokens_per_mm_item | 
| 197 |  | -            for key, max_tokens_per_mm_item in max_tokens_per_item.items() | 
| 198 |  | -        } | 
| 199 |  | - | 
| 200 |  | -    # TODO: Remove once V0 is gone | 
| 201 |  | -    def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int: | 
| 202 |  | -        """ | 
| 203 |  | -        Get the maximum number of multi-modal tokens | 
| 204 |  | -        for profiling the memory usage of a model. | 
| 205 |  | -        """ | 
| 206 |  | -        return sum(self.get_max_tokens_by_modality(model_config).values()) | 
| 207 |  | - | 
| 208 | 178 |     def get_mm_limits_per_prompt( | 
| 209 | 179 |         self, | 
| 210 | 180 |         model_config: "ModelConfig", | 
|  | 
0 commit comments