@@ -396,10 +396,15 @@ def __post_init__(self):
396396 "try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
397397 "to 'spawn'." )
398398
399- # Disable prefix caching only if chunked prefill is explicitly disabled
400- # (and not merely unset)
401- if (self .scheduler_config .chunked_prefill_enabled is False
402- or disable_chunked_prefill_reasons ):
399+ # Final off-switch for CP/APC:
400+ # Disable for (a) collected blockers, or
401+ # (b) explicit CP=False when APC wasn't requested.
402+ # Do NOT disable merely because the resolved CP flag is False.
403+ apc_requested = (self .cache_config is not None
404+ and self .cache_config .enable_prefix_caching )
405+ if (disable_chunked_prefill_reasons
406+ or (self .scheduler_config .enable_chunked_prefill is False
407+ and not apc_requested )):
403408 for reason in disable_chunked_prefill_reasons :
404409 logger .info (reason )
405410 self .scheduler_config .chunked_prefill_enabled = False
@@ -668,7 +673,7 @@ def try_verify_and_update_config(self):
668673 f"Model: { self .model_config .model } " )
669674
670675 def compile_debug_dump_path (self ) -> Optional [Path ]:
671- """Returns a rank-aware path for dumping
676+ """Returns a rank-aware path for dumping
672677 torch.compile debug information.
673678 """
674679 if self .compilation_config .debug_dump_path is None :
0 commit comments