@@ -76,6 +76,9 @@ def __init__(
76
76
# KV Connector pushes/pull of remote KVs for P/D and offloading.
77
77
self .connector = None
78
78
if self .vllm_config .kv_transfer_config is not None :
79
+ assert len (self .kv_cache_config .kv_cache_groups ) == 1 , (
80
+ "Multiple KV cache groups are not currently supported "
81
+ "with KV connectors" )
79
82
self .connector = KVConnectorFactory .create_connector_v1 (
80
83
config = self .vllm_config , role = KVConnectorRole .SCHEDULER )
81
84
@@ -985,9 +988,8 @@ def _connector_finished(
985
988
"""
986
989
if self .connector is None :
987
990
return False , None
988
- assert len (self .kv_cache_config .kv_cache_groups
989
- ) == 1 , "KV connector only supports one KV cache group now"
990
- block_ids = self .kv_cache_manager .get_block_ids (request .request_id )[0 ]
991
+
992
+ (block_ids , ) = self .kv_cache_manager .get_block_ids (request .request_id )
991
993
return self .connector .request_finished (request , block_ids )
992
994
993
995
def _update_waiting_for_remote_kv (self , request : Request ) -> bool :
@@ -1002,12 +1004,12 @@ def _update_waiting_for_remote_kv(self, request: Request) -> bool:
1002
1004
and the request state will be moved back to WAITING from
1003
1005
WAITING_FOR_REMOTE_KV.
1004
1006
"""
1007
+ assert self .connector is not None
1005
1008
if request .request_id not in self .finished_recving_kv_req_ids :
1006
1009
return False
1007
- assert len (self .kv_cache_config .kv_cache_groups
1008
- ) == 1 , "KV connector only supports one KV cache group now"
1010
+
1009
1011
# Now that the blocks are ready, actually cache them.
1010
- block_ids = self .kv_cache_manager .get_block_ids (request .request_id )[ 0 ]
1012
+ ( block_ids , ) = self .kv_cache_manager .get_block_ids (request .request_id )
1011
1013
num_computed_tokens = len (block_ids ) * self .block_size
1012
1014
# Handle the case where num request tokens less then one block.
1013
1015
num_computed_tokens = min (num_computed_tokens , request .num_tokens )
0 commit comments