Skip to content

Commit 7fa2e11

Browse files
committed
[Frontend] Add MCP type support infrastructure to Responses API
Signed-off-by: Daniel Salib <danielsalib@meta.com>
1 parent 7b5575f commit 7fa2e11

File tree

3 files changed

+293
-30
lines changed

3 files changed

+293
-30
lines changed

tests/entrypoints/test_harmony_utils.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
5+
from openai.types.responses.response_output_item import McpCall
56
from openai_harmony import Author, Message, Role, TextContent
67

78
from vllm.entrypoints.harmony_utils import (
@@ -451,3 +452,167 @@ def test_has_custom_tools() -> None:
451452
assert has_custom_tools(
452453
{"web_search_preview", "code_interpreter", "container", "others"}
453454
)
455+
456+
457+
def test_parse_mcp_call_basic() -> None:
458+
"""Test that MCP calls are parsed with correct type and server_label."""
459+
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
460+
message = message.with_recipient("filesystem")
461+
message = message.with_channel("commentary")
462+
463+
output_items = parse_output_message(message)
464+
465+
assert len(output_items) == 1
466+
assert isinstance(output_items[0], McpCall)
467+
assert output_items[0].type == "mcp_call"
468+
assert output_items[0].name == "filesystem"
469+
assert output_items[0].server_label == "filesystem"
470+
assert output_items[0].arguments == '{"path": "/tmp"}'
471+
assert output_items[0].status == "completed"
472+
473+
474+
def test_parse_mcp_call_dotted_recipient() -> None:
475+
"""Test that dotted recipients extract the tool name correctly."""
476+
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
477+
message = message.with_recipient("repo_browser.list")
478+
message = message.with_channel("commentary")
479+
480+
output_items = parse_output_message(message)
481+
482+
assert len(output_items) == 1
483+
assert isinstance(output_items[0], McpCall)
484+
assert output_items[0].name == "list"
485+
assert output_items[0].server_label == "repo_browser"
486+
487+
488+
def test_mcp_vs_function_call() -> None:
489+
"""Test that function calls are not parsed as MCP calls."""
490+
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
491+
func_message = func_message.with_recipient("functions.my_tool")
492+
func_message = func_message.with_channel("commentary")
493+
494+
func_items = parse_output_message(func_message)
495+
496+
assert len(func_items) == 1
497+
assert not isinstance(func_items[0], McpCall)
498+
assert func_items[0].type == "function_call"
499+
500+
501+
def test_mcp_vs_builtin_tools() -> None:
502+
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
503+
# Test python (built-in tool) - should be reasoning, not MCP
504+
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
505+
python_message = python_message.with_recipient("python")
506+
python_message = python_message.with_channel("commentary")
507+
508+
python_items = parse_output_message(python_message)
509+
510+
assert len(python_items) == 1
511+
assert not isinstance(python_items[0], McpCall)
512+
assert python_items[0].type == "reasoning"
513+
514+
515+
def test_parse_remaining_state_commentary_channel() -> None:
516+
"""Test parse_remaining_state with commentary channel and various recipients."""
517+
from unittest.mock import Mock
518+
519+
from vllm.entrypoints.harmony_utils import parse_remaining_state
520+
521+
# Test 1: functions.* recipient → should return function tool call
522+
parser_func = Mock()
523+
parser_func.current_content = '{"arg": "value"}'
524+
parser_func.current_role = Role.ASSISTANT
525+
parser_func.current_channel = "commentary"
526+
parser_func.current_recipient = "functions.my_tool"
527+
528+
func_items = parse_remaining_state(parser_func)
529+
530+
assert len(func_items) == 1
531+
assert not isinstance(func_items[0], McpCall)
532+
assert func_items[0].type == "function_call"
533+
assert func_items[0].name == "my_tool"
534+
assert func_items[0].status == "in_progress"
535+
536+
# Test 2: MCP tool (not builtin) → should return MCP call
537+
parser_mcp = Mock()
538+
parser_mcp.current_content = '{"path": "/tmp"}'
539+
parser_mcp.current_role = Role.ASSISTANT
540+
parser_mcp.current_channel = "commentary"
541+
parser_mcp.current_recipient = "filesystem"
542+
543+
mcp_items = parse_remaining_state(parser_mcp)
544+
545+
assert len(mcp_items) == 1
546+
assert isinstance(mcp_items[0], McpCall)
547+
assert mcp_items[0].type == "mcp_call"
548+
assert mcp_items[0].name == "filesystem"
549+
assert mcp_items[0].server_label == "filesystem"
550+
assert mcp_items[0].status == "in_progress"
551+
552+
# Test 3: Built-in tool (python)
553+
# should NOT return MCP call, falls through to reasoning
554+
parser_builtin = Mock()
555+
parser_builtin.current_content = "print('hello')"
556+
parser_builtin.current_role = Role.ASSISTANT
557+
parser_builtin.current_channel = "commentary"
558+
parser_builtin.current_recipient = "python"
559+
560+
builtin_items = parse_remaining_state(parser_builtin)
561+
562+
# Should fall through to reasoning logic
563+
assert len(builtin_items) == 1
564+
assert not isinstance(builtin_items[0], McpCall)
565+
assert builtin_items[0].type == "reasoning"
566+
567+
568+
def test_parse_remaining_state_analysis_channel() -> None:
569+
"""Test parse_remaining_state with analysis channel and various recipients."""
570+
from unittest.mock import Mock
571+
572+
from vllm.entrypoints.harmony_utils import parse_remaining_state
573+
574+
# Test 1: functions.* recipient → should return function tool call
575+
parser_func = Mock()
576+
parser_func.current_content = '{"arg": "value"}'
577+
parser_func.current_role = Role.ASSISTANT
578+
parser_func.current_channel = "analysis"
579+
parser_func.current_recipient = "functions.my_tool"
580+
581+
func_items = parse_remaining_state(parser_func)
582+
583+
assert len(func_items) == 1
584+
assert not isinstance(func_items[0], McpCall)
585+
assert func_items[0].type == "function_call"
586+
assert func_items[0].name == "my_tool"
587+
assert func_items[0].status == "in_progress"
588+
589+
# Test 2: MCP tool (not builtin) → should return MCP call
590+
parser_mcp = Mock()
591+
parser_mcp.current_content = '{"query": "test"}'
592+
parser_mcp.current_role = Role.ASSISTANT
593+
parser_mcp.current_channel = "analysis"
594+
parser_mcp.current_recipient = "database"
595+
596+
mcp_items = parse_remaining_state(parser_mcp)
597+
598+
assert len(mcp_items) == 1
599+
assert isinstance(mcp_items[0], McpCall)
600+
assert mcp_items[0].type == "mcp_call"
601+
assert mcp_items[0].name == "database"
602+
assert mcp_items[0].server_label == "database"
603+
assert mcp_items[0].status == "in_progress"
604+
605+
# Test 3: Built-in tool (container)
606+
# should NOT return MCP call, falls through to reasoning
607+
parser_builtin = Mock()
608+
parser_builtin.current_content = "docker run"
609+
parser_builtin.current_role = Role.ASSISTANT
610+
parser_builtin.current_channel = "analysis"
611+
parser_builtin.current_recipient = "container"
612+
613+
builtin_items = parse_remaining_state(parser_builtin)
614+
615+
# Should fall through to reasoning logic
616+
assert len(builtin_items) == 1
617+
assert not isinstance(builtin_items[0], McpCall)
618+
assert builtin_items[0].type == "reasoning"

vllm/entrypoints/harmony_utils.py

Lines changed: 120 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ActionSearch,
2020
ResponseFunctionWebSearch,
2121
)
22+
from openai.types.responses.response_output_item import McpCall
2223
from openai.types.responses.response_reasoning_item import (
2324
Content as ResponseReasoningTextContent,
2425
)
@@ -155,11 +156,7 @@ def get_developer_message(
155156
"web_search_preview",
156157
"code_interpreter",
157158
"container",
158-
"mcp",
159159
):
160-
# These are built-in tools that are added to the system message.
161-
# Adding in MCP for now until we support MCP tools executed
162-
# server side
163160
pass
164161

165162
elif tool.type == "function":
@@ -427,6 +424,44 @@ def _parse_final_message(message: Message) -> ResponseOutputItem:
427424
)
428425

429426

427+
def _parse_mcp_recipient(recipient: str) -> tuple[str, str]:
428+
"""
429+
Parse MCP recipient into (server_label, tool_name).
430+
431+
For dotted recipients like "repo_browser.list":
432+
- server_label: "repo_browser" (namespace/server)
433+
- tool_name: "list" (specific tool)
434+
435+
For simple recipients like "filesystem":
436+
- server_label: "filesystem"
437+
- tool_name: "filesystem"
438+
"""
439+
if "." in recipient:
440+
server_label = recipient.split(".")[0]
441+
tool_name = recipient.split(".")[-1]
442+
else:
443+
server_label = recipient
444+
tool_name = recipient
445+
return server_label, tool_name
446+
447+
448+
def _parse_mcp_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
449+
"""Parse MCP calls into MCP call items."""
450+
server_label, tool_name = _parse_mcp_recipient(recipient)
451+
output_items = []
452+
for content in message.content:
453+
response_item = McpCall(
454+
arguments=content.text,
455+
type="mcp_call",
456+
name=tool_name,
457+
server_label=server_label,
458+
id=f"mcp_{random_uuid()}",
459+
status="completed",
460+
)
461+
output_items.append(response_item)
462+
return output_items
463+
464+
430465
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
431466
"""
432467
Parse a Harmony message into a list of output response items.
@@ -440,18 +475,13 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
440475
output_items: list[ResponseOutputItem] = []
441476
recipient = message.recipient
442477

443-
# Browser tool calls
444-
if recipient is not None and recipient.startswith("browser."):
445-
output_items.append(_parse_browser_tool_call(message, recipient))
446-
447-
# Analysis channel (reasoning/chain-of-thought)
448-
elif message.channel == "analysis":
449-
output_items.extend(_parse_reasoning_content(message))
478+
if recipient is not None:
479+
# Browser tool calls
480+
if recipient.startswith("browser."):
481+
output_items.append(_parse_browser_tool_call(message, recipient))
450482

451-
# Commentary channel
452-
elif message.channel == "commentary":
453-
# Function calls
454-
if recipient is not None and recipient.startswith("functions."):
483+
# Function calls (should only happen on commentary channel)
484+
elif message.channel == "commentary" and recipient.startswith("functions."):
455485
output_items.extend(_parse_function_call(message, recipient))
456486

457487
# Built-in tools on commentary channel are treated as reasoning for now
@@ -463,10 +493,19 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
463493
# multiple functions - explanatory text with no recipient. Built-in tool
464494
# recipients (python/browser/container) also generate reasoning output.
465495
output_items.extend(_parse_reasoning_content(message))
496+
497+
# All other recipients are MCP calls
466498
else:
467-
raise ValueError(f"Unknown recipient: {recipient}")
499+
output_items.extend(_parse_mcp_call(message, recipient))
500+
501+
# No recipient - handle based on channel for non-tool messages
502+
elif message.channel == "analysis":
503+
output_items.extend(_parse_reasoning_content(message))
504+
505+
elif message.channel == "commentary":
506+
# Commentary channel without recipient shouldn't happen
507+
raise ValueError(f"Commentary channel message without recipient: {message}")
468508

469-
# Final output message
470509
elif message.channel == "final":
471510
output_items.append(_parse_final_message(message))
472511

@@ -485,20 +524,70 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
485524
if current_recipient is not None and current_recipient.startswith("browser."):
486525
return []
487526

488-
if parser.current_channel == "analysis":
489-
reasoning_item = ResponseReasoningItem(
490-
id=f"rs_{random_uuid()}",
491-
summary=[],
492-
type="reasoning",
493-
content=[
494-
ResponseReasoningTextContent(
495-
text=parser.current_content, type="reasoning_text"
527+
if current_recipient and parser.current_channel in ("commentary", "analysis"):
528+
if current_recipient.startswith("functions."):
529+
rid = random_uuid()
530+
return [
531+
ResponseFunctionToolCall(
532+
arguments=parser.current_content,
533+
call_id=f"call_{rid}",
534+
type="function_call",
535+
name=current_recipient.split(".")[-1],
536+
id=f"fc_{rid}",
537+
status="in_progress",
496538
)
497-
],
498-
status=None,
499-
)
500-
return [reasoning_item]
501-
elif parser.current_channel == "final":
539+
]
540+
# Built-in tools (python, browser, container) should be treated as reasoning
541+
elif not (
542+
current_recipient.startswith("python")
543+
or current_recipient.startswith("browser")
544+
or current_recipient.startswith("container")
545+
):
546+
# All other recipients are MCP calls
547+
rid = random_uuid()
548+
server_label, tool_name = _parse_mcp_recipient(current_recipient)
549+
return [
550+
McpCall(
551+
arguments=parser.current_content,
552+
type="mcp_call",
553+
name=tool_name,
554+
server_label=server_label,
555+
id=f"mcp_{rid}",
556+
status="in_progress",
557+
)
558+
]
559+
560+
if parser.current_channel == "commentary":
561+
return [
562+
ResponseReasoningItem(
563+
id=f"rs_{random_uuid()}",
564+
summary=[],
565+
type="reasoning",
566+
content=[
567+
ResponseReasoningTextContent(
568+
text=parser.current_content, type="reasoning_text"
569+
)
570+
],
571+
status=None,
572+
)
573+
]
574+
575+
if parser.current_channel == "analysis":
576+
return [
577+
ResponseReasoningItem(
578+
id=f"rs_{random_uuid()}",
579+
summary=[],
580+
type="reasoning",
581+
content=[
582+
ResponseReasoningTextContent(
583+
text=parser.current_content, type="reasoning_text"
584+
)
585+
],
586+
status=None,
587+
)
588+
]
589+
590+
if parser.current_channel == "final":
502591
output_text = ResponseOutputText(
503592
text=parser.current_content,
504593
annotations=[], # TODO
@@ -515,6 +604,7 @@ def parse_remaining_state(parser: StreamableParser) -> list[ResponseOutputItem]:
515604
type="message",
516605
)
517606
return [text_item]
607+
518608
return []
519609

520610

0 commit comments

Comments
 (0)