@@ -396,10 +396,17 @@ def __post_init__(self):
396396 "try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
397397 "to 'spawn'." )
398398
399- # Disable prefix caching only if chunked prefill is explicitly disabled
400- # (and not merely unset)
401- if (self .scheduler_config .chunked_prefill_enabled is False
402- or disable_chunked_prefill_reasons ):
399+ # Final off-switch for CP/APC:
400+ # Disable for (a) collected blockers, (b) encoder–decoder, or
401+ # (c) explicit CP=False when APC wasn't requested.
402+ # Do NOT disable merely because the resolved CP flag is False.
403+ apc_requested = (self .cache_config is not None
404+ and self .cache_config .enable_prefix_caching )
405+ if (disable_chunked_prefill_reasons
406+ or (self .model_config is not None
407+ and self .model_config .is_encoder_decoder )
408+ or (self .scheduler_config .enable_chunked_prefill is False
409+ and not apc_requested )):
403410 for reason in disable_chunked_prefill_reasons :
404411 logger .info (reason )
405412 self .scheduler_config .chunked_prefill_enabled = False
@@ -668,7 +675,7 @@ def try_verify_and_update_config(self):
668675 f"Model: { self .model_config .model } " )
669676
670677 def compile_debug_dump_path (self ) -> Optional [Path ]:
671- """Returns a rank-aware path for dumping
678+ """Returns a rank-aware path for dumping
672679 torch.compile debug information.
673680 """
674681 if self .compilation_config .debug_dump_path is None :
0 commit comments