Skip to content

Commit 76cad23

Browse files
author
Andrew Xia
committed
unit test
Signed-off-by: Andrew Xia <axia@fb.com>
1 parent fb48da8 commit 76cad23

File tree

3 files changed

+40
-33
lines changed

3 files changed

+40
-33
lines changed

tests/entrypoints/openai/test_response_api_parsable_context.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ async def test_mcp_tool_call(client: OpenAI, model_name: str):
165165
model=model_name,
166166
input="What is 13 * 24? Use python to calculate the result.",
167167
tools=[{"type": "code_interpreter", "container": {"type": "auto"}}],
168+
extra_body={"enable_response_messages": True},
168169
temperature=0.0,
169170
)
170171

@@ -178,3 +179,8 @@ async def test_mcp_tool_call(client: OpenAI, model_name: str):
178179
# make sure the correct math is in the final output
179180
assert response.output[3].type == "message"
180181
assert "312" in response.output[3].content[0].text
182+
183+
# test raw input_messages / output_messages
184+
assert len(response.input_messages) == 1
185+
assert len(response.output_messages) == 3
186+
assert "312" in response.output_messages[2]["message"]

vllm/entrypoints/context.py

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -182,21 +182,23 @@ def append_output(self, output) -> None:
182182
self.num_cached_tokens = output.num_cached_tokens or 0
183183
self.num_output_tokens += len(output.outputs[0].token_ids or [])
184184

185-
if len(self.input_messages) == 0:
186-
output_prompt = output.prompt or ""
187-
output_prompt_token_ids = output.prompt_token_ids or []
188-
self.input_messages.append(
185+
# only store if enable_response_messages is True, save memory
186+
if self.request.enable_response_messages:
187+
if len(self.input_messages) == 0:
188+
output_prompt = output.prompt or ""
189+
output_prompt_token_ids = output.prompt_token_ids or []
190+
self.input_messages.append(
191+
ResponseRawMessageAndToken(
192+
message=output_prompt,
193+
tokens=output_prompt_token_ids,
194+
)
195+
)
196+
self.output_messages.append(
189197
ResponseRawMessageAndToken(
190-
message=output_prompt,
191-
tokens=output_prompt_token_ids,
198+
message=output.outputs[0].text,
199+
tokens=output.outputs[0].token_ids,
192200
)
193201
)
194-
self.output_messages.append(
195-
ResponseRawMessageAndToken(
196-
message=output.outputs[0].text,
197-
tokens=output.outputs[0].token_ids,
198-
)
199-
)
200202

201203
def append_tool_output(self, output) -> None:
202204
raise NotImplementedError("Should not be called.")
@@ -274,30 +276,31 @@ def append_output(self, output: RequestOutput) -> None:
274276
self.num_cached_tokens = output.num_cached_tokens or 0
275277
self.num_output_tokens += len(output.outputs[0].token_ids or [])
276278
self.parser.process(output.outputs[0])
277-
output_prompt = output.prompt or ""
278-
output_prompt_token_ids = output.prompt_token_ids or []
279-
if len(self.input_messages) == 0:
280-
self.input_messages.append(
281-
ResponseRawMessageAndToken(
282-
message=output_prompt,
283-
tokens=output_prompt_token_ids,
279+
280+
# only store if enable_response_messages is True, save memory
281+
if self.request.enable_response_messages:
282+
output_prompt = output.prompt or ""
283+
output_prompt_token_ids = output.prompt_token_ids or []
284+
if len(self.input_messages) == 0:
285+
self.input_messages.append(
286+
ResponseRawMessageAndToken(
287+
message=output_prompt,
288+
tokens=output_prompt_token_ids,
289+
)
290+
)
291+
else:
292+
self.output_messages.append(
293+
ResponseRawMessageAndToken(
294+
message=output_prompt,
295+
tokens=output_prompt_token_ids,
296+
)
284297
)
285-
)
286-
else:
287-
# TODO: merge them in properly together
288-
# TODO: responsesParser doesn't parse kimi k2 sentences correctly
289298
self.output_messages.append(
290299
ResponseRawMessageAndToken(
291-
message=output_prompt,
292-
tokens=output_prompt_token_ids,
300+
message=output.outputs[0].text,
301+
tokens=output.outputs[0].token_ids,
293302
)
294303
)
295-
self.output_messages.append(
296-
ResponseRawMessageAndToken(
297-
message=output.outputs[0].text,
298-
tokens=output.outputs[0].token_ids,
299-
)
300-
)
301304

302305
def append_tool_output(self, output: list[ResponseInputOutputItem]) -> None:
303306
self.parser.response_messages.extend(output)

vllm/entrypoints/openai/serving_responses.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,6 @@ async def create_responses(
318318
if maybe_validation_error is not None:
319319
return maybe_validation_error
320320

321-
fbvscode.set_trace()
322-
323321
# If the engine is dead, raise the engine's DEAD_ERROR.
324322
# This is required for the streaming case, where we return a
325323
# success status before we actually start generating text :).

0 commit comments

Comments
 (0)