Skip to content

Commit

Permalink
[Core] Remove duplicate processing in async engine (vllm-project#5525)
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 authored and jimpang committed Jul 24, 2024
1 parent f216f46 commit f8e0edb
Showing 1 changed file with 1 addition and 13 deletions.
14 changes: 1 addition & 13 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,21 +583,9 @@ async def add_request(
if arrival_time is None:
arrival_time = time.time()

if self.engine_use_ray:
processed_inputs = await self.engine.process_model_inputs_async \
.remote( # type: ignore
request_id=request_id,
inputs=inputs,
lora_request=lora_request)
else:
processed_inputs = await self.engine.process_model_inputs_async(
request_id=request_id,
inputs=inputs,
lora_request=lora_request)

stream = self._request_tracker.add_request(
request_id,
inputs=processed_inputs,
inputs=inputs,
params=params,
arrival_time=arrival_time,
lora_request=lora_request,
Expand Down

0 comments on commit f8e0edb

Please sign in to comment.