Skip to content

Commit b25cfab

Browse files
authored
[V1] Avoid sending text prompt to core engine (vllm-project#11963)
Signed-off-by: Roger Wang <ywang@roblox.com>
1 parent 4b657d3 commit b25cfab

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

vllm/v1/engine/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ class EngineCoreRequest:
1919
# due to circular imports and typing we have in data.py
2020

2121
request_id: str
22-
#NOTE(Nick): I don't think we need to pass prompt here since it should
23-
# always be tokenized?
22+
# NOTE(ywang96): original text prompt is needed when a request is added to
23+
# Detokenizer, but set to None when it is added to EngineCoreClient.
2424
prompt: Optional[str]
2525
prompt_token_ids: List[int]
2626
mm_inputs: Optional[List[Optional["MultiModalKwargs"]]]

vllm/v1/engine/core_client.py

+6
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,9 @@ def _send_input(self, request_type: EngineCoreRequestType,
219219
self.input_socket.send_multipart(msg, copy=False)
220220

221221
def add_request(self, request: EngineCoreRequest) -> None:
222+
# NOTE: text prompt is not needed in the core engine as it has been
223+
# tokenized.
224+
request.prompt = None
222225
self._send_input(EngineCoreRequestType.ADD, request)
223226

224227
def abort_requests(self, request_ids: List[str]) -> None:
@@ -257,6 +260,9 @@ async def _send_input(self, request_type: EngineCoreRequestType,
257260
await self.input_socket.send_multipart(msg, copy=False)
258261

259262
async def add_request_async(self, request: EngineCoreRequest) -> None:
263+
# NOTE: text prompt is not needed in the core engine as it has been
264+
# tokenized.
265+
request.prompt = None
260266
await self._send_input(EngineCoreRequestType.ADD, request)
261267

262268
async def abort_requests_async(self, request_ids: List[str]) -> None:

0 commit comments

Comments
 (0)