File tree Expand file tree Collapse file tree 1 file changed +13
-16
lines changed
Expand file tree Collapse file tree 1 file changed +13
-16
lines changed Original file line number Diff line number Diff line change @@ -103,25 +103,22 @@ def run_vllm(
103103 )
104104
105105 # Add the requests to the engine.
106+ prompts = []
107+ sampling_params = []
106108 for prompt , _ , output_len in requests :
107- sampling_params = SamplingParams (
108- n = n ,
109- temperature = 0.0 if use_beam_search else 1.0 ,
110- top_p = 1.0 ,
111- use_beam_search = use_beam_search ,
112- ignore_eos = True ,
113- max_tokens = output_len ,
114- )
115- # FIXME(woosuk): Do not use internal method.
116- llm ._add_request (
117- prompt = prompt ,
118- prompt_token_ids = None ,
119- sampling_params = sampling_params ,
120- )
109+ prompts .append (prompt )
110+ sampling_params .append (
111+ SamplingParams (
112+ n = n ,
113+ temperature = 0.0 if use_beam_search else 1.0 ,
114+ top_p = 1.0 ,
115+ use_beam_search = use_beam_search ,
116+ ignore_eos = True ,
117+ max_tokens = output_len ,
118+ ))
121119
122120 start = time .perf_counter ()
123- # FIXME(woosuk): Do not use internal method.
124- llm ._run_engine (use_tqdm = True )
121+ llm .generate (prompts , sampling_params , use_tqdm = True )
125122 end = time .perf_counter ()
126123 return end - start
127124
You can’t perform that action at this time.
0 commit comments