1717from vllm .executor .executor_base import ExecutorAsyncBase
1818from vllm .executor .gpu_executor import GPUExecutorAsync
1919from vllm .executor .ray_utils import initialize_ray_cluster
20- from vllm .inputs import PromptType
20+ from vllm .inputs import PromptInputs
2121from vllm .logger import init_logger
2222from vllm .lora .request import LoRARequest
2323from vllm .model_executor .layers .sampler import SamplerOutput
@@ -405,7 +405,7 @@ async def stop_remote_worker_execution_loop_async(self) -> None:
405405 async def add_request_async (
406406 self ,
407407 request_id : str ,
408- prompt : PromptType ,
408+ inputs : PromptInputs ,
409409 params : Union [SamplingParams , PoolingParams ],
410410 arrival_time : Optional [float ] = None ,
411411 lora_request : Optional [LoRARequest ] = None ,
@@ -420,7 +420,7 @@ async def add_request_async(
420420 arrival_time = time .time ()
421421
422422 preprocessed_inputs = await self .input_preprocessor .preprocess_async (
423- prompt ,
423+ inputs ,
424424 request_id = request_id ,
425425 lora_request = lora_request ,
426426 prompt_adapter_request = prompt_adapter_request ,
@@ -777,7 +777,7 @@ async def run_engine_loop(engine_ref: ReferenceType):
777777 async def add_request (
778778 self ,
779779 request_id : str ,
780- prompt : PromptType ,
780+ inputs : PromptInputs ,
781781 params : Union [SamplingParams , PoolingParams ],
782782 arrival_time : Optional [float ] = None ,
783783 lora_request : Optional [LoRARequest ] = None ,
@@ -797,7 +797,7 @@ async def add_request(
797797 stream = self ._request_tracker .add_request (
798798 request_id ,
799799 verbose = self .log_requests ,
800- prompt = prompt ,
800+ inputs = inputs ,
801801 params = params ,
802802 arrival_time = arrival_time or time .time (),
803803 lora_request = lora_request ,
@@ -808,7 +808,7 @@ async def add_request(
808808
809809 async def generate (
810810 self ,
811- prompt : PromptType ,
811+ inputs : PromptInputs ,
812812 sampling_params : SamplingParams ,
813813 request_id : str ,
814814 lora_request : Optional [LoRARequest ] = None ,
@@ -822,7 +822,8 @@ async def generate(
822822 from the LLMEngine to the caller.
823823
824824 Args:
825- prompt: The prompt to the LLM. See :class:`~vllm.inputs.PromptType`
825+ inputs: The inputs to the LLM. See
826+ :class:`~vllm.inputs.PromptInputs`
826827 for more details about the format of each input.
827828 sampling_params: The sampling parameters of the request.
828829 request_id: The unique id of the request.
@@ -880,7 +881,7 @@ async def generate(
880881 """
881882 async for output in await self .add_request (
882883 request_id ,
883- prompt ,
884+ inputs ,
884885 sampling_params ,
885886 lora_request = lora_request ,
886887 trace_headers = trace_headers ,
@@ -890,7 +891,7 @@ async def generate(
890891
891892 async def encode (
892893 self ,
893- prompt : PromptType ,
894+ inputs : PromptInputs ,
894895 pooling_params : PoolingParams ,
895896 request_id : str ,
896897 lora_request : Optional [LoRARequest ] = None ,
@@ -903,7 +904,8 @@ async def encode(
903904 from the LLMEngine to the caller.
904905
905906 Args:
906- prompt: The prompt to the LLM. See :class:`~vllm.inputs.PromptType`
907+ inputs: The inputs to the LLM. See
908+ :class:`~vllm.inputs.PromptInputs`
907909 for more details about the format of each input.
908910 pooling_params: The pooling parameters of the request.
909911 request_id: The unique id of the request.
@@ -957,7 +959,7 @@ async def encode(
957959 """
958960 async for output in await self .add_request (
959961 request_id ,
960- prompt ,
962+ inputs ,
961963 pooling_params ,
962964 lora_request = lora_request ,
963965 trace_headers = trace_headers ,
0 commit comments