Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 28d0d6d

Browse files
DarkLight1337Robert Shaw
authored andcommitted
[Core] Remove duplicate processing in async engine (vllm-project#5525)
1 parent 4464401 commit 28d0d6d

File tree

1 file changed

+1
-13
lines changed

1 file changed

+1
-13
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -580,21 +580,9 @@ async def add_request(
580580
if arrival_time is None:
581581
arrival_time = time.time()
582582

583-
if self.engine_use_ray:
584-
processed_inputs = await self.engine.process_model_inputs_async \
585-
.remote( # type: ignore
586-
request_id=request_id,
587-
inputs=inputs,
588-
lora_request=lora_request)
589-
else:
590-
processed_inputs = await self.engine.process_model_inputs_async(
591-
request_id=request_id,
592-
inputs=inputs,
593-
lora_request=lora_request)
594-
595583
stream = self._request_tracker.add_request(
596584
request_id,
597-
inputs=processed_inputs,
585+
inputs=inputs,
598586
params=params,
599587
arrival_time=arrival_time,
600588
lora_request=lora_request,

0 commit comments

Comments
 (0)