Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/v1/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,7 +1917,7 @@ def test_priority_scheduling_preemption_when_out_of_kv():
def test_chunked_prefill_disabled_for_encoder_decoder(
enable_chunked_prefill: bool, is_encoder_decoder: bool,
expect_enabled: bool) -> None:
"""Validate that chunked prefill is appropriately disabled for
"""Validate that chunked prefill is appropriately disabled for
encoder-decoder models."""
scheduler_config = SchedulerConfig(
enable_chunked_prefill=enable_chunked_prefill,
Expand All @@ -1942,7 +1942,7 @@ def test_chunked_prefill_disabled_for_encoder_decoder(
def _validate_chunked_prefill_settings_for_encoder_decoder(
scheduler_config: SchedulerConfig, is_encoder_decoder: bool,
expect_enabled: bool) -> None:
"""Validate chunked prefill settings in the scheduler config for
"""Validate chunked prefill settings in the scheduler config for
encoder-decoder models."""
assert scheduler_config.chunked_prefill_enabled is expect_enabled
assert scheduler_config.enable_chunked_prefill is expect_enabled
Expand Down
17 changes: 12 additions & 5 deletions vllm/config/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,17 @@ def __post_init__(self):
"try setting 'VLLM_WORKER_MULTIPROC_METHOD' "
"to 'spawn'.")

# Disable prefix caching only if chunked prefill is explicitly disabled
# (and not merely unset)
if (self.scheduler_config.chunked_prefill_enabled is False
or disable_chunked_prefill_reasons):
# Final off-switch for CP/APC:
# Disable for (a) collected blockers, (b) encoder–decoder, or
# (c) explicit CP=False when APC wasn't requested.
# Do NOT disable merely because the resolved CP flag is False.
apc_requested = (self.cache_config is not None
and self.cache_config.enable_prefix_caching)
if (disable_chunked_prefill_reasons
or (self.model_config is not None
and self.model_config.is_encoder_decoder)
or (self.scheduler_config.enable_chunked_prefill is False
Copy link
Collaborator

@noooop noooop Oct 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Honestly, since I can only look at the code on my phone, I have a ton of questions about this part of the logic.

LGTM if @DarkLight1337 approves. Or, I’ll give it a closer look when I’m back on the 8th.

and not apc_requested)):
for reason in disable_chunked_prefill_reasons:
logger.info(reason)
self.scheduler_config.chunked_prefill_enabled = False
Expand Down Expand Up @@ -668,7 +675,7 @@ def try_verify_and_update_config(self):
f"Model: {self.model_config.model}")

def compile_debug_dump_path(self) -> Optional[Path]:
"""Returns a rank-aware path for dumping
"""Returns a rank-aware path for dumping
torch.compile debug information.
"""
if self.compilation_config.debug_dump_path is None:
Expand Down