From f8e0edbe0f7394a3bd81875e791521deba48ec52 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 15 Jun 2024 01:04:42 +0800 Subject: [PATCH] [Core] Remove duplicate processing in async engine (#5525) --- vllm/engine/async_llm_engine.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index bd50bc557d9dd..467dfc2db4180 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -583,21 +583,9 @@ async def add_request( if arrival_time is None: arrival_time = time.time() - if self.engine_use_ray: - processed_inputs = await self.engine.process_model_inputs_async \ - .remote( # type: ignore - request_id=request_id, - inputs=inputs, - lora_request=lora_request) - else: - processed_inputs = await self.engine.process_model_inputs_async( - request_id=request_id, - inputs=inputs, - lora_request=lora_request) - stream = self._request_tracker.add_request( request_id, - inputs=processed_inputs, + inputs=inputs, params=params, arrival_time=arrival_time, lora_request=lora_request,