File tree Expand file tree Collapse file tree 1 file changed +14
-1
lines changed
vllm/distributed/kv_transfer/kv_connector/v1 Expand file tree Collapse file tree 1 file changed +14
-1
lines changed Original file line number Diff line number Diff line change @@ -298,8 +298,21 @@ def request_finished(
298
298
logger .debug (
299
299
"NIXLConnector request_finished, request_status=%s, "
300
300
"kv_transfer_params=%s" , request .status , params )
301
+ if not params :
302
+ return False , None
303
+
304
+ if params .get ("do_remote_prefill" ):
305
+ # If do_remote_prefill is still True when the request is finished,
306
+ # update_state_after_alloc must not have been called (the request
307
+ # must have been aborted before it was scheduled).
308
+ # To avoid stranding the prefill blocks in the prefill instance,
309
+ # we must add empty block_ids to _reqs_need_recv so that our
310
+ # worker side will notify and free blocks in the prefill instance.
311
+ self ._reqs_need_recv [request .request_id ] = (request , [])
312
+ params ["do_remote_prefill" ] = False
313
+ return False , None
301
314
302
- if (params is None or not params .get ("do_remote_decode" )
315
+ if (not params .get ("do_remote_decode" )
303
316
or request .status != RequestStatus .FINISHED_LENGTH_CAPPED ):
304
317
return False , None
305
318
You can’t perform that action at this time.
0 commit comments