Skip to content

Commit 557c96b

Browse files
committed
add global dummy lists to avoid creating dummy list each time when executing prepare_input_tensors function
1 parent cee7918 commit 557c96b

File tree

1 file changed

+15
-3
lines changed

1 file changed

+15
-3
lines changed

vllm/worker/habana_model_runner.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,8 @@ class HabanaModelRunnerBase(ModelRunnerBase[TModelInputForHPU]):
488488
Helper class for shared methods between GPU model runners.
489489
"""
490490
_model_input_cls: Type[TModelInputForHPU]
491+
dummy_prompt_list: List[SequenceGroupMetadata]
492+
dummy_decode_list: List[SequenceGroupMetadata]
491493

492494
def __init__(
493495
self,
@@ -1087,9 +1089,16 @@ def prepare_input_tensors(
10871089
batch_size_padded = find_bucket(real_batch_size, bucket_cfg)
10881090
batch_size_padding = batch_size_padded - real_batch_size
10891091
seq_group_metadata_list = seq_group_metadata_list.copy()
1090-
seq_group_metadata_list.extend(
1091-
self.create_dummy_seq_group_metadata(0, 0, is_prompt)
1092-
for _ in range(batch_size_padding))
1092+
1093+
if is_prompt:
1094+
seq_group_metadata_list.extend(
1095+
self.dummy_prompt_list
1096+
for _ in range(batch_size_padding))
1097+
else:
1098+
seq_group_metadata_list.extend(
1099+
self.dummy_decode_list
1100+
for _ in range(batch_size_padding))
1101+
10931102

10941103
prefill_reqs = []
10951104
decode_reqs = []
@@ -1292,6 +1301,9 @@ def profile_run(self) -> None:
12921301
self.max_num_batched_tokens // max_batch_size)
12931302

12941303
self.warmup_scenario(max_batch_size, max_seq_len, True, kv_caches)
1304+
self.dummy_prompt_list = self.create_dummy_seq_group_metadata(0, 0, 1)
1305+
self.dummy_decode_list = self.create_dummy_seq_group_metadata(0, 0, 0)
1306+
12951307
return
12961308

12971309
def warmup_scenario(self,

0 commit comments

Comments
 (0)