Skip to content

Commit 2c2156b

Browse files
author
Vasilis Kontonis
committed
correct restart count tracking to use original request ID
- Move original_req_id lookup before restart count check - Use original_req_id for restart count lookup instead of current req_id - Fixes request ID collisions (was creating duplicate IDs like 0_0_restart_1) - Fixes restart counter (was always showing restart vllm-project#1) - Fixes KV cache leak (duplicate IDs prevented proper cleanup) - Enhanced debug logging to show both original and current request IDs
1 parent 39cbc5f commit 2c2156b

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

vllm/v1/engine/output_processor.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,11 @@ def process_outputs(
597597
# 4) Check for restart condition (BlockPhi3 specific)
598598
should_restart = False
599599
if pooling_output is None and self._should_restart(new_token_ids):
600-
# Check restart limits
601-
current_restart_count = self.restart_count.get(req_id, 0)
600+
# Get the original request ID FIRST (in case this is already a restarted request)
601+
original_req_id = self.original_request_id.get(req_id, req_id)
602+
603+
# Check restart limits using ORIGINAL request ID
604+
current_restart_count = self.restart_count.get(original_req_id, 0)
602605
if current_restart_count < self.max_restarts_per_request:
603606
# Extract full generated text
604607
assert req_state.detokenizer is not None
@@ -609,9 +612,6 @@ def process_outputs(
609612
if extraction:
610613
block_content, summary_text = extraction
611614

612-
# Get the original request ID (in case this is already a restarted request)
613-
original_req_id = self.original_request_id.get(req_id, req_id)
614-
615615
# Store this block+summary in accumulated history
616616
if original_req_id not in self.accumulated_blocks:
617617
self.accumulated_blocks[original_req_id] = []
@@ -652,7 +652,7 @@ def process_outputs(
652652
try:
653653
with open("/tmp/blockphi3_restart_debug.log", "a") as f:
654654
f.write(
655-
f"[RESTART] Request {req_id} restart #{current_restart_count + 1}\n"
655+
f"[RESTART] Request {original_req_id} (current: {req_id}) restart #{current_restart_count + 1}\n"
656656
f" Block length: {len(block_content)}\n"
657657
f" Summary length: {len(summary_text)}\n"
658658
f" New prompt tokens: {len(new_prompt_tokens)}\n"
@@ -664,7 +664,7 @@ def process_outputs(
664664
try:
665665
with open("/tmp/blockphi3_restart_debug.log", "a") as f:
666666
f.write(
667-
f"[RESTART_LIMIT] Request {req_id} reached max restarts "
667+
f"[RESTART_LIMIT] Request {original_req_id} (current: {req_id}) reached max restarts "
668668
f"({self.max_restarts_per_request})\n"
669669
)
670670
except Exception:

0 commit comments

Comments
 (0)