Skip to content

Commit b6bbde0

Browse files
committed
[Frontend] Add MCP type support infrastructure to Responses API
Signed-off-by: Daniel Salib <danielsalib@meta.com>
1 parent 899e2ef commit b6bbde0

File tree

3 files changed

+294
-32
lines changed

3 files changed

+294
-32
lines changed

tests/entrypoints/test_harmony_utils.py

Lines changed: 165 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4-
from openai_harmony import Role
4+
from openai.types.responses.response_output_item import McpCall
5+
from openai_harmony import Message, Role
56

67
from vllm.entrypoints.harmony_utils import (
78
has_custom_tools,
89
parse_input_to_harmony_message,
10+
parse_output_message,
911
)
1012

1113

@@ -264,3 +266,165 @@ def test_has_custom_tools() -> None:
264266
assert has_custom_tools(
265267
{"web_search_preview", "code_interpreter", "container", "others"}
266268
)
269+
270+
271+
def test_parse_mcp_call_basic() -> None:
272+
"""Test that MCP calls are parsed with correct type and server_label."""
273+
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
274+
message = message.with_recipient("filesystem")
275+
message = message.with_channel("commentary")
276+
277+
output_items = parse_output_message(message)
278+
279+
assert len(output_items) == 1
280+
assert isinstance(output_items[0], McpCall)
281+
assert output_items[0].type == "mcp_call"
282+
assert output_items[0].name == "filesystem"
283+
assert output_items[0].server_label == "filesystem"
284+
assert output_items[0].arguments == '{"path": "/tmp"}'
285+
assert output_items[0].status == "completed"
286+
287+
288+
def test_parse_mcp_call_dotted_recipient() -> None:
289+
"""Test that dotted recipients extract the tool name correctly."""
290+
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
291+
message = message.with_recipient("repo_browser.list")
292+
message = message.with_channel("commentary")
293+
294+
output_items = parse_output_message(message)
295+
296+
assert len(output_items) == 1
297+
assert isinstance(output_items[0], McpCall)
298+
assert output_items[0].name == "list"
299+
assert output_items[0].server_label == "repo_browser"
300+
301+
302+
def test_mcp_vs_function_call() -> None:
303+
"""Test that function calls are not parsed as MCP calls."""
304+
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
305+
func_message = func_message.with_recipient("functions.my_tool")
306+
func_message = func_message.with_channel("commentary")
307+
308+
func_items = parse_output_message(func_message)
309+
310+
assert len(func_items) == 1
311+
assert not isinstance(func_items[0], McpCall)
312+
assert func_items[0].type == "function_call"
313+
314+
315+
def test_mcp_vs_builtin_tools() -> None:
316+
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
317+
# Test python (built-in tool) - should be reasoning, not MCP
318+
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
319+
python_message = python_message.with_recipient("python")
320+
python_message = python_message.with_channel("commentary")
321+
322+
python_items = parse_output_message(python_message)
323+
324+
assert len(python_items) == 1
325+
assert not isinstance(python_items[0], McpCall)
326+
assert python_items[0].type == "reasoning"
327+
328+
329+
def test_parse_remaining_state_commentary_channel() -> None:
330+
"""Test parse_remaining_state with commentary channel and various recipients."""
331+
from unittest.mock import Mock
332+
333+
from vllm.entrypoints.harmony_utils import parse_remaining_state
334+
335+
# Test 1: functions.* recipient → should return function tool call
336+
parser_func = Mock()
337+
parser_func.current_content = '{"arg": "value"}'
338+
parser_func.current_role = Role.ASSISTANT
339+
parser_func.current_channel = "commentary"
340+
parser_func.current_recipient = "functions.my_tool"
341+
342+
func_items = parse_remaining_state(parser_func)
343+
344+
assert len(func_items) == 1
345+
assert not isinstance(func_items[0], McpCall)
346+
assert func_items[0].type == "function_call"
347+
assert func_items[0].name == "my_tool"
348+
assert func_items[0].status == "in_progress"
349+
350+
# Test 2: MCP tool (not builtin) → should return MCP call
351+
parser_mcp = Mock()
352+
parser_mcp.current_content = '{"path": "/tmp"}'
353+
parser_mcp.current_role = Role.ASSISTANT
354+
parser_mcp.current_channel = "commentary"
355+
parser_mcp.current_recipient = "filesystem"
356+
357+
mcp_items = parse_remaining_state(parser_mcp)
358+
359+
assert len(mcp_items) == 1
360+
assert isinstance(mcp_items[0], McpCall)
361+
assert mcp_items[0].type == "mcp_call"
362+
assert mcp_items[0].name == "filesystem"
363+
assert mcp_items[0].server_label == "filesystem"
364+
assert mcp_items[0].status == "in_progress"
365+
366+
# Test 3: Built-in tool (python) → should NOT return MCP call, falls through to reasoning
367+
parser_builtin = Mock()
368+
parser_builtin.current_content = "print('hello')"
369+
parser_builtin.current_role = Role.ASSISTANT
370+
parser_builtin.current_channel = "commentary"
371+
parser_builtin.current_recipient = "python"
372+
373+
builtin_items = parse_remaining_state(parser_builtin)
374+
375+
# Should fall through to reasoning logic
376+
assert len(builtin_items) == 1
377+
assert not isinstance(builtin_items[0], McpCall)
378+
assert builtin_items[0].type == "reasoning"
379+
380+
381+
def test_parse_remaining_state_analysis_channel() -> None:
382+
"""Test parse_remaining_state with analysis channel and various recipients."""
383+
from unittest.mock import Mock
384+
385+
from vllm.entrypoints.harmony_utils import parse_remaining_state
386+
387+
# Test 1: functions.* recipient → should return function tool call
388+
parser_func = Mock()
389+
parser_func.current_content = '{"arg": "value"}'
390+
parser_func.current_role = Role.ASSISTANT
391+
parser_func.current_channel = "analysis"
392+
parser_func.current_recipient = "functions.my_tool"
393+
394+
func_items = parse_remaining_state(parser_func)
395+
396+
assert len(func_items) == 1
397+
assert not isinstance(func_items[0], McpCall)
398+
assert func_items[0].type == "function_call"
399+
assert func_items[0].name == "my_tool"
400+
assert func_items[0].status == "in_progress"
401+
402+
# Test 2: MCP tool (not builtin) → should return MCP call
403+
parser_mcp = Mock()
404+
parser_mcp.current_content = '{"query": "test"}'
405+
parser_mcp.current_role = Role.ASSISTANT
406+
parser_mcp.current_channel = "analysis"
407+
parser_mcp.current_recipient = "database"
408+
409+
mcp_items = parse_remaining_state(parser_mcp)
410+
411+
assert len(mcp_items) == 1
412+
assert isinstance(mcp_items[0], McpCall)
413+
assert mcp_items[0].type == "mcp_call"
414+
assert mcp_items[0].name == "database"
415+
assert mcp_items[0].server_label == "database"
416+
assert mcp_items[0].status == "in_progress"
417+
418+
# Test 3: Built-in tool (container) → should NOT return MCP call, falls through to reasoning
419+
parser_builtin = Mock()
420+
parser_builtin.current_content = "docker run"
421+
parser_builtin.current_role = Role.ASSISTANT
422+
parser_builtin.current_channel = "analysis"
423+
parser_builtin.current_recipient = "container"
424+
425+
builtin_items = parse_remaining_state(parser_builtin)
426+
427+
# Should fall through to reasoning logic
428+
assert len(builtin_items) == 1
429+
assert not isinstance(builtin_items[0], McpCall)
430+
assert builtin_items[0].type == "reasoning"

vllm/entrypoints/harmony_utils.py

Lines changed: 121 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ActionSearch,
2020
ResponseFunctionWebSearch,
2121
)
22+
from openai.types.responses.response_output_item import McpCall
2223
from openai.types.responses.response_reasoning_item import (
2324
Content as ResponseReasoningTextContent,
2425
)
@@ -155,11 +156,7 @@ def get_developer_message(
155156
"web_search_preview",
156157
"code_interpreter",
157158
"container",
158-
"mcp",
159159
):
160-
# These are built-in tools that are added to the system message.
161-
# Adding in MCP for now until we support MCP tools executed
162-
# server side
163160
pass
164161

165162
elif tool.type == "function":
@@ -427,6 +424,44 @@ def _parse_final_message(message: Message) -> ResponseOutputItem:
427424
)
428425

429426

427+
def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
428+
"""
429+
Parse MCP recipient into (server_label, tool_name).
430+
431+
For dotted recipients like "repo_browser.list":
432+
- server_label: "repo_browser" (namespace/server)
433+
- tool_name: "list" (specific tool)
434+
435+
For simple recipients like "filesystem":
436+
- server_label: "filesystem"
437+
- tool_name: "filesystem"
438+
"""
439+
if "." in recipient:
440+
server_label = recipient.split(".")[0]
441+
tool_name = recipient.split(".")[-1]
442+
else:
443+
server_label = recipient
444+
tool_name = recipient
445+
return server_label, tool_name
446+
447+
448+
def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
449+
"""Parse MCP calls into MCP call items."""
450+
server_label, tool_name = _parse_mcp_recipient(recipient)
451+
output_items = []
452+
for content in message.content:
453+
response_item = McpCall(
454+
arguments=content.text,
455+
type="mcp_call",
456+
name=tool_name,
457+
server_label=server_label,
458+
id=f"mcp_{random_uuid()}",
459+
status="completed",
460+
)
461+
output_items.append(response_item)
462+
return output_items
463+
464+
430465
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
431466
"""
432467
Parse a Harmony message into a list of output response items.
@@ -440,31 +475,35 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
440475
output_items: list[ResponseOutputItem] = []
441476
recipient = message.recipient
442477

443-
# Browser tool calls
444-
if recipient is not None and recipient.startswith("browser."):
445-
output_items.append(_parse_browser_tool_call(message, recipient))
446-
447-
# Analysis channel (reasoning/chain-of-thought)
448-
elif message.channel == "analysis":
449-
output_items.extend(_parse_reasoning_content(message))
478+
if recipient is not None:
479+
# Browser tool calls
480+
if recipient.startswith("browser."):
481+
output_items.append(_parse_browser_tool_call(message, recipient))
450482

451-
# Commentary channel
452-
elif message.channel == "commentary":
453-
# Function calls
454-
if recipient is not None and recipient.startswith("functions."):
483+
# Function calls (should only happen on commentary channel)
484+
elif message.channel == "commentary" and recipient.startswith("functions."):
455485
output_items.extend(_parse_function_call(message, recipient))
456486

457487
# Built-in tools on commentary channel are treated as reasoning for now
458-
elif recipient is not None and (
488+
elif (
459489
recipient.startswith("python")
460490
or recipient.startswith("browser")
461491
or recipient.startswith("container")
462492
):
463493
output_items.extend(_parse_reasoning_content(message))
494+
495+
# All other recipients are MCP calls
464496
else:
465-
raise ValueError(f"Unknown recipient: {recipient}")
497+
output_items.extend(_parse_mcp_call(message, recipient))
498+
499+
# No recipient - handle based on channel for non-tool messages
500+
elif message.channel == "analysis":
501+
output_items.extend(_parse_reasoning_content(message))
502+
503+
elif message.channel == "commentary":
504+
# Commentary channel without recipient shouldn't happen
505+
raise ValueError(f"Commentary channel message without recipient: {message}")
466506

467-
# Final output message
468507
elif message.channel == "final":
469508
output_items.append(_parse_final_message(message))
470509

@@ -483,20 +522,70 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
483522
if current_recipient is not None and current_recipient.startswith("browser."):
484523
return []
485524

486-
if parser.current_channel == "analysis":
487-
reasoning_item = ResponseReasoningItem(
488-
id=f"rs_{random_uuid()}",
489-
summary=[],
490-
type="reasoning",
491-
content=[
492-
ResponseReasoningTextContent(
493-
text=parser.current_content, type="reasoning_text"
525+
if current_recipient and parser.current_channel in ("commentary", "analysis"):
526+
if current_recipient.startswith("functions."):
527+
rid = random_uuid()
528+
return [
529+
ResponseFunctionToolCall(
530+
arguments=parser.current_content,
531+
call_id=f"call_{rid}",
532+
type="function_call",
533+
name=current_recipient.split(".")[-1],
534+
id=f"fc_{rid}",
535+
status="in_progress",
494536
)
495-
],
496-
status=None,
497-
)
498-
return [reasoning_item]
499-
elif parser.current_channel == "final":
537+
]
538+
# Built-in tools (python, browser, container) should be treated as reasoning
539+
elif not (
540+
current_recipient.startswith("python")
541+
or current_recipient.startswith("browser")
542+
or current_recipient.startswith("container")
543+
):
544+
# All other recipients are MCP calls
545+
rid = random_uuid()
546+
server_label, tool_name = _parse_mcp_recipient(current_recipient)
547+
return [
548+
McpCall(
549+
arguments=parser.current_content,
550+
type="mcp_call",
551+
name=tool_name,
552+
server_label=server_label,
553+
id=f"mcp_{rid}",
554+
status="in_progress",
555+
)
556+
]
557+
558+
if parser.current_channel == "commentary":
559+
return [
560+
ResponseReasoningItem(
561+
id=f"rs_{random_uuid()}",
562+
summary=[],
563+
type="reasoning",
564+
content=[
565+
ResponseReasoningTextContent(
566+
text=parser.current_content, type="reasoning_text"
567+
)
568+
],
569+
status=None,
570+
)
571+
]
572+
573+
if parser.current_channel == "analysis":
574+
return [
575+
ResponseReasoningItem(
576+
id=f"rs_{random_uuid()}",
577+
summary=[],
578+
type="reasoning",
579+
content=[
580+
ResponseReasoningTextContent(
581+
text=parser.current_content, type="reasoning_text"
582+
)
583+
],
584+
status=None,
585+
)
586+
]
587+
588+
if parser.current_channel == "final":
500589
output_text = ResponseOutputText(
501590
text=parser.current_content,
502591
annotations=[], # TODO
@@ -513,6 +602,7 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
513602
type="message",
514603
)
515604
return [text_item]
605+
516606
return []
517607

518608

0 commit comments

Comments
 (0)