File tree Expand file tree Collapse file tree 2 files changed +9
-5
lines changed
tests/v1/kv_connector/nixl_integration Expand file tree Collapse file tree 2 files changed +9
-5
lines changed Original file line number Diff line number Diff line change 44# Models to run
55MODELS=(
66 " Qwen/Qwen3-0.6B"
7- # "deepseek-ai/deepseek-vl2-tiny"
7+ " deepseek-ai/deepseek-vl2-tiny"
88)
99
1010# Number of prefill and decode instances to create
Original file line number Diff line number Diff line change @@ -301,16 +301,20 @@ def allocate_slots(
301301 if not self .enable_caching :
302302 return KVCacheBlocks (new_blocks )
303303
304- if not delay_cache_blocks :
304+ if delay_cache_blocks :
305+ # P/D: delay caching the blocks if we need to wait for the
306+ # KVs to be recved from remote, but update num_cached_block
307+ # with the prefix cache hits to avoid double caching later.
308+ assert request .request_id not in self .num_cached_block
309+ self .num_cached_block [request .request_id ] = len (
310+ new_computed_block_list )
311+ else :
305312 self .cache_blocks (
306313 request = request ,
307314 num_tokens = num_tokens ,
308315 num_computed_tokens = num_computed_tokens ,
309316 new_computed_block_list = new_computed_block_list ,
310317 )
311- else :
312- self .num_cached_block [request .request_id ] = len (
313- new_computed_block_list )
314318
315319 return KVCacheBlocks (new_blocks )
316320
You can’t perform that action at this time.
0 commit comments