Skip to content

Commit 5c3fc88

Browse files
correctness!
Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
1 parent 9e7ee72 commit 5c3fc88

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ set -xe
44
# Models to run
55
MODELS=(
66
"Qwen/Qwen3-0.6B"
7-
# "deepseek-ai/deepseek-vl2-tiny"
7+
"deepseek-ai/deepseek-vl2-tiny"
88
)
99

1010
# Number of prefill and decode instances to create

vllm/v1/core/kv_cache_manager.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -301,16 +301,20 @@ def allocate_slots(
301301
if not self.enable_caching:
302302
return KVCacheBlocks(new_blocks)
303303

304-
if not delay_cache_blocks:
304+
if delay_cache_blocks:
305+
# P/D: delay caching the blocks if we need to wait for the
306+
# KVs to be recved from remote, but update num_cached_block
307+
# with the prefix cache hits to avoid double caching later.
308+
assert request.request_id not in self.num_cached_block
309+
self.num_cached_block[request.request_id] = len(
310+
new_computed_block_list)
311+
else:
305312
self.cache_blocks(
306313
request=request,
307314
num_tokens=num_tokens,
308315
num_computed_tokens=num_computed_tokens,
309316
new_computed_block_list=new_computed_block_list,
310317
)
311-
else:
312-
self.num_cached_block[request.request_id] = len(
313-
new_computed_block_list)
314318

315319
return KVCacheBlocks(new_blocks)
316320

0 commit comments

Comments
 (0)