Skip to content

Commit

Permalink
Add debug las print & debug las
Browse files Browse the repository at this point in the history
  • Loading branch information
mchen644 committed Oct 21, 2024
1 parent cfa379d commit 4404d66
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 74 deletions.
13 changes: 7 additions & 6 deletions benchmarks/1_serving_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ result_dir="${pwd}/result"
# scheduler_policy=(infer)
# swap_policies=(partial)
declare -a scheduler_swap_policies
scheduler_swap_policies[0]="tfittradeoff partial"
# scheduler_swap_policies[0]="tfittradeoff partial"
# scheduler_swap_policies[1]="fcfs full"
# scheduler_swap_policies[2]="las full"
scheduler_swap_policies[2]="las full"
# scheduler_swap_policies[1]="tfittradeoff full"
# scheduler_swap_policies[2]="sjf full"
# scheduler_swap_policies[3]="sjmlfq full"
Expand All @@ -37,14 +37,15 @@ scheduler_swap_policies[0]="tfittradeoff partial"

preemption_mode="swap"
gpu_memory_utilization=0.9 # 0.5, 0.7, 0.9
max_num_seqs=128
max_num_seqs=384
swap_space=64
max_tokens=2048
iter_theshold=15
max_serving_time=1000
request_rates[0]=0.5
max_serving_time=1200
# request_rates[0]=0.5
# request_rates[1]=1.0
# request_rates[2]=50.0
request_rates[1]=5.0
# request_rates[2]=10.0
# request_rates[3]=10.0
# request_rates[4]=20.0
# request_rates[5]=50.0
Expand Down
6 changes: 6 additions & 0 deletions benchmarks/result/20241017/0_to_0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cp 0/0/* 0/.
cp 1/1/* 1/.
cp 2/2/* 2/.
rm -r 0/0
rm -r 1/1
rm -r 2/2
6 changes: 6 additions & 0 deletions benchmarks/result/20241018/0_to_0.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cp 0/0/* 0/.
cp 1/1/* 1/.
cp 2/2/* 2/.
rm -r 0/0
rm -r 1/1
rm -r 2/2
101 changes: 43 additions & 58 deletions benchmarks/result/analysis/result_analysis_1.ipynb

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions rebuild_vllm.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# rm -r build
#pip uninstall -y vllm
#taskset -c 14-19 python3 setup.py bdist_wheel --dist-dir=dist
#pip install dist/vllm-0.4.3+cu124-cp310-cp310-linux_x86_64.whl
#pip uninstall -y vllm-flash-attn
#clear
pip uninstall -y vllm
taskset -c 14-19 python3 setup.py bdist_wheel --dist-dir=dist
pip install dist/vllm-0.4.3+cu124-cp310-cp310-linux_x86_64.whl
pip uninstall -y vllm-flash-attn
clear
cd benchmarks
bash 1_serving_benchmark.sh
5 changes: 1 addition & 4 deletions vllm/core/block_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,7 @@ def _get_physical_blocks(
for seq in seq_group.get_seqs():
if seq.is_finished():
continue
try:
blocks.update(self.block_tables[seq.seq_id])
except:
print(seq, self.block_tables.keys())
blocks.update(self.block_tables[seq.seq_id])
# Cross-attention blocks
if seq_group.is_encoder_decoder():
blocks.update(self.cross_block_tables[request_id])
Expand Down
35 changes: 34 additions & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,16 +1422,20 @@ def _schedule_running_partial(
# Preempt the lowest-priority sequence groups.
if not partial_swapped_flag:
victim_seq_group = running_queue.pop()
print(f"victim seq_group seq status: ", victim_seq_group.get_seqs())

preempted_mode = self._preempt(victim_seq_group,
blocks_to_swap_out,
self.preemption_mode)
print(f"victim seq_group seq status: after _preempt(), {preempted_mode}", victim_seq_group.get_seqs())
self.total_swap_out_blocks += victim_seq_group.total_token_block_size
self.total_swap_out_seqs += 1

if preempted_mode == PreemptionMode.RECOMPUTE:
preempted.add(victim_seq_group)
preempted.add(victim_seq_group)s
else:
if len(victim_seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
raise ValueError("111 There should not be any wating seq in the swapped group")
swapped_out.add(victim_seq_group)
else:
# swap out part of the seq_group in the running_queue
Expand Down Expand Up @@ -1502,6 +1506,8 @@ def _schedule_running_partial(
if preempted_mode == PreemptionMode.RECOMPUTE:
preempted.add(victim_seq_group)
else:
if len(victim_seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
raise ValueError("222 There should not be any wating seq in the swapped group")
swapped_out.add(victim_seq_group)
else:
# No other sequence groups can be preempted.
Expand All @@ -1514,6 +1520,8 @@ def _schedule_running_partial(
if preempted_mode == PreemptionMode.RECOMPUTE:
preempted.add(seq_group)
else:
if len(seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
raise ValueError("333 There should not be any wating seq in the swapped group")
swapped_out.add(seq_group)
else:
victim_seq_group = seq_group
Expand All @@ -1536,6 +1544,8 @@ def _schedule_running_partial(
if preempted_mode == PreemptionMode.RECOMPUTE:
preempted.add(victim_seq_group)
else:
if len(victim_seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
raise ValueError("444 There should not be any wating seq in the swapped group")
swapped_out.add(victim_seq_group)
break
else:
Expand All @@ -1546,7 +1556,15 @@ def _schedule_running_partial(

if len(swapped_out) > 0:
total_swapped_out = set(self.swapped)
for i, seq_group in enumerate(swapped_out):
# print(i, seq_group.get_seqs())
if len(seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
raise ValueError("555 There should not be any wating seq in the swapped group")
swapped_out = swapped_out.difference(total_swapped_out)
for i, seq_group in enumerate(swapped_out):
# print(i, seq_group.get_seqs())
if len(seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
raise ValueError("666 There should not be any wating seq in the swapped group")

self.schedule_running_time += time.time() - now

Expand Down Expand Up @@ -2265,6 +2283,12 @@ def _schedule_chunked_prefill(self):
policy,
# pending_swapped_rate=1,
enable_chunking=True)

# for i, seq_group in enumerate(self.swapped):
# # print(i, seq_group.get_seqs())
# if len(seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
# raise ValueError("111 There should not be any wating seq in the swapped group")

# Schedule swapped out requests.
# If preemption happens, it means we don't have space for swap-in.
if len(running_scheduled.preempted) + len(
Expand Down Expand Up @@ -2299,7 +2323,15 @@ def _schedule_chunked_prefill(self):

# Update swapped requests.
self.swapped = remaining_swapped
# for i, seq_group in enumerate(self.swapped):
# print(i, seq_group.get_seqs())
# if len(seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
# raise ValueError("222 There should not be any wating seq in the swapped group")
self.swapped.extend(running_scheduled.swapped_out)
# for i, seq_group in enumerate(self.swapped):
# print(i, seq_group.get_seqs())
# if len(seq_group.get_seqs(status=SequenceStatus.WAITING)) != 0:
# raise ValueError("333 There should not be any wating seq in the swapped group")
self.iter_nums += 1

# Motivation:
Expand Down Expand Up @@ -2645,6 +2677,7 @@ def _preempt(
self.num_cumulative_preemption += 1

if preemption_mode == PreemptionMode.RECOMPUTE or not self.block_manager.can_swap_out(seq_group):
preemption_mode = PreemptionMode.RECOMPUTE
self._preempt_by_recompute(seq_group)
elif preemption_mode == PreemptionMode.SWAP:
self._preempt_by_swap(seq_group,
Expand Down

0 comments on commit 4404d66

Please sign in to comment.