Skip to content

Commit

Permalink
improve chunked prefill performance
Browse files Browse the repository at this point in the history
[Bugfix] Fix vllm-project#7592 vllm 0.5.4 enable_chunked_prefill throughput is slightly lower than 0.5.3~0.5.0. (vllm-project#7874)
  • Loading branch information
noooop authored and gongdao123 committed Oct 17, 2024
1 parent 5d90bd1 commit 44997d2
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,16 +883,21 @@ def _schedule_chunked_prefill(self):

# Update waiting requests.
self.waiting.extendleft(running_scheduled.preempted)

# Update new running requests.
self.running.extend([s.seq_group for s in prefills.seq_groups])
self.running.extend(
[s.seq_group for s in running_scheduled.decode_seq_groups])
self.running.extend(
[s.seq_group for s in running_scheduled.prefill_seq_groups])
# By default, vLLM scheduler prioritizes prefills.
# Once chunked prefill is enabled,
# the policy is changed to prioritize decode requests.
self.running.extend(
[s.seq_group for s in swapped_in.decode_seq_groups])
self.running.extend(
[s.seq_group for s in swapped_in.prefill_seq_groups])
self.running.extend(
[s.seq_group for s in running_scheduled.decode_seq_groups])
self.running.extend(
[s.seq_group for s in running_scheduled.prefill_seq_groups])
self.running.extend([s.seq_group for s in prefills.seq_groups])

# Update swapped requests.
self.swapped.extend(running_scheduled.swapped_out)
return SchedulerOutputs(
Expand Down

0 comments on commit 44997d2

Please sign in to comment.