We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 3e0b7c2 commit 22b4a66Copy full SHA for 22b4a66
vllm/engine/arg_utils.py
@@ -903,6 +903,11 @@ def create_engine_config(self) -> EngineConfig:
903
"--enable-prefix-caching is currently not "
904
"supported for multimodal models and has been disabled.")
905
self.enable_prefix_caching = False
906
+ if model_config.is_encoder_decoder_model:
907
+ logger.warning(
908
+ "Block Manager v2 does not support encoder-decoder models"
909
+ " currently. Using Block Manager v1 as fallback.")
910
+ self.use_v2_block_manager = False
911
912
cache_config = CacheConfig(
913
block_size=self.block_size if self.device != "neuron" else
0 commit comments