Skip to content

Commit bd2d381

Browse files
committed
[Frontend] Add MCP type support infrastructure to Responses API
Signed-off-by: Daniel Salib <danielsalib@meta.com>
1 parent 17a9abe commit bd2d381

File tree

4 files changed

+414
-47
lines changed

4 files changed

+414
-47
lines changed

RECIPES_UPDATE.md

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# MCP Tools Update for vLLM Recipes - GPT-OSS.md
2+
3+
This content should be added to: https://github.com/vllm-project/recipes/blob/main/OpenAI/GPT-OSS.md
4+
5+
---
6+
7+
## Add this new section after "Tool Calling" section:
8+
9+
### MCP Tool Calling
10+
11+
vLLM supports MCP (Model Context Protocol) tools through the Responses API, allowing models to call external services and execute code.
12+
13+
#### Setup
14+
15+
1. **Set environment variables**:
16+
```bash
17+
export VLLM_ENABLE_RESPONSES_API_STORE=1
18+
export VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS=code_interpreter
19+
export VLLM_RESPONSES_API_USE_MCP_TYPES=1
20+
export VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS=1
21+
```
22+
23+
2. **Start vLLM server**:
24+
```bash
25+
vllm serve openai/gpt-oss-20b \
26+
--tool-server demo \
27+
--enable-auto-tool-choice \
28+
--tool-call-parser gptoss
29+
```
30+
31+
#### Built-in MCP Tools
32+
33+
| Tool | Description | Server Label |
34+
|------|-------------|--------------|
35+
| Python Execution | Execute Python code | `code_interpreter` |
36+
| Container | Run commands in containers | `container` |
37+
| Web Search | Search and retrieve web content | `web_search_preview` |
38+
39+
#### Example Usage
40+
41+
**Basic MCP Tool Call:**
42+
```python
43+
from openai import OpenAI
44+
45+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="empty")
46+
47+
response = client.responses.create(
48+
model="openai/gpt-oss-20b",
49+
input="Calculate 123 * 456 using Python",
50+
tools=[{"type": "mcp", "server_label": "code_interpreter"}],
51+
instructions="You must use the Python tool. Never simulate execution.",
52+
)
53+
54+
print(response.output_text)
55+
```
56+
57+
**Streaming MCP Events:**
58+
```python
59+
stream = client.responses.create(
60+
model="openai/gpt-oss-20b",
61+
input="What is 100 / 4? Use Python.",
62+
tools=[{"type": "mcp", "server_label": "code_interpreter"}],
63+
stream=True,
64+
)
65+
66+
for event in stream:
67+
if "mcp_call" in event.type:
68+
print(f"Event: {event.type}")
69+
# Events: response.mcp_call.in_progress
70+
# response.mcp_call_arguments.delta
71+
# response.mcp_call_arguments.done
72+
# response.mcp_call.completed
73+
```
74+
75+
**Filter Specific Tools:**
76+
```python
77+
response = client.responses.create(
78+
model="openai/gpt-oss-20b",
79+
input="Add 10 and 20",
80+
tools=[{
81+
"type": "mcp",
82+
"server_label": "calculator",
83+
"allowed_tools": ["add"] # Only allow specific tools
84+
}],
85+
)
86+
```
87+
88+
#### Environment Variables Reference
89+
90+
| Variable | Description | Default |
91+
|----------|-------------|---------|
92+
| `VLLM_ENABLE_RESPONSES_API_STORE` | Enable response storage | `0` |
93+
| `VLLM_GPT_OSS_SYSTEM_TOOL_MCP_LABELS` | Comma-separated MCP server labels | `""` |
94+
| `VLLM_RESPONSES_API_USE_MCP_TYPES` | Enable MCP type system | `0` |
95+
| `VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS` | Enable harmony instructions | `0` |
96+
97+
#### Example Code
98+
99+
See `examples/online_serving/openai_responses_client_with_mcp_tools.py` for a complete working example.
100+
101+
---
102+
103+
## Add this note to the existing "Tool Calling" section:
104+
105+
> **Note**: For MCP (Model Context Protocol) tools that call external services, see the [MCP Tool Calling](#mcp-tool-calling) section below.

tests/entrypoints/test_harmony_utils.py

Lines changed: 176 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
5+
from openai.types.responses.response_output_item import McpCall
56
from openai_harmony import Author, Message, Role, TextContent
67

78
from vllm.entrypoints.harmony_utils import (
@@ -400,17 +401,19 @@ def test_commentary_with_multiple_function_calls(self):
400401
assert output_items[0].arguments == '{"location": "San Francisco"}'
401402
assert output_items[1].arguments == '{"location": "New York"}'
402403

403-
def test_commentary_with_unknown_recipient_raises_error(self):
404-
"""Test that commentary with unknown recipient raises ValueError."""
405-
message = Message.from_role_and_content(Role.ASSISTANT, "some content")
404+
def test_commentary_with_unknown_recipient_creates_mcp_call(self):
405+
"""Test that commentary with unknown recipient creates MCP call."""
406+
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
406407
message = message.with_channel("commentary")
407-
message = message.with_recipient("unknown_recipient")
408+
message = message.with_recipient("custom_tool")
408409

409-
try:
410-
parse_output_message(message)
411-
raise AssertionError("Expected ValueError to be raised")
412-
except ValueError as e:
413-
assert "Unknown recipient: unknown_recipient" in str(e)
410+
output_items = parse_output_message(message)
411+
412+
assert len(output_items) == 1
413+
assert isinstance(output_items[0], McpCall)
414+
assert output_items[0].type == "mcp_call"
415+
assert output_items[0].name == "custom_tool"
416+
assert output_items[0].server_label == "custom_tool"
414417

415418
def test_analysis_channel_creates_reasoning(self):
416419
"""Test that analysis channel creates reasoning items."""
@@ -451,3 +454,167 @@ def test_has_custom_tools() -> None:
451454
assert has_custom_tools(
452455
{"web_search_preview", "code_interpreter", "container", "others"}
453456
)
457+
458+
459+
def test_parse_mcp_call_basic() -> None:
460+
"""Test that MCP calls are parsed with correct type and server_label."""
461+
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
462+
message = message.with_recipient("filesystem")
463+
message = message.with_channel("commentary")
464+
465+
output_items = parse_output_message(message)
466+
467+
assert len(output_items) == 1
468+
assert isinstance(output_items[0], McpCall)
469+
assert output_items[0].type == "mcp_call"
470+
assert output_items[0].name == "filesystem"
471+
assert output_items[0].server_label == "filesystem"
472+
assert output_items[0].arguments == '{"path": "/tmp"}'
473+
assert output_items[0].status == "completed"
474+
475+
476+
def test_parse_mcp_call_dotted_recipient() -> None:
477+
"""Test that dotted recipients extract the tool name correctly."""
478+
message = Message.from_role_and_content(Role.ASSISTANT, '{"cmd": "ls"}')
479+
message = message.with_recipient("repo_browser.list")
480+
message = message.with_channel("commentary")
481+
482+
output_items = parse_output_message(message)
483+
484+
assert len(output_items) == 1
485+
assert isinstance(output_items[0], McpCall)
486+
assert output_items[0].name == "list"
487+
assert output_items[0].server_label == "repo_browser"
488+
489+
490+
def test_mcp_vs_function_call() -> None:
491+
"""Test that function calls are not parsed as MCP calls."""
492+
func_message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
493+
func_message = func_message.with_recipient("functions.my_tool")
494+
func_message = func_message.with_channel("commentary")
495+
496+
func_items = parse_output_message(func_message)
497+
498+
assert len(func_items) == 1
499+
assert not isinstance(func_items[0], McpCall)
500+
assert func_items[0].type == "function_call"
501+
502+
503+
def test_mcp_vs_builtin_tools() -> None:
504+
"""Test that built-in tools (python, container) are not parsed as MCP calls."""
505+
# Test python (built-in tool) - should be reasoning, not MCP
506+
python_message = Message.from_role_and_content(Role.ASSISTANT, "print('hello')")
507+
python_message = python_message.with_recipient("python")
508+
python_message = python_message.with_channel("commentary")
509+
510+
python_items = parse_output_message(python_message)
511+
512+
assert len(python_items) == 1
513+
assert not isinstance(python_items[0], McpCall)
514+
assert python_items[0].type == "reasoning"
515+
516+
517+
def test_parse_remaining_state_commentary_channel() -> None:
518+
"""Test parse_remaining_state with commentary channel and various recipients."""
519+
from unittest.mock import Mock
520+
521+
from vllm.entrypoints.harmony_utils import parse_remaining_state
522+
523+
# Test 1: functions.* recipient → should return function tool call
524+
parser_func = Mock()
525+
parser_func.current_content = '{"arg": "value"}'
526+
parser_func.current_role = Role.ASSISTANT
527+
parser_func.current_channel = "commentary"
528+
parser_func.current_recipient = "functions.my_tool"
529+
530+
func_items = parse_remaining_state(parser_func)
531+
532+
assert len(func_items) == 1
533+
assert not isinstance(func_items[0], McpCall)
534+
assert func_items[0].type == "function_call"
535+
assert func_items[0].name == "my_tool"
536+
assert func_items[0].status == "in_progress"
537+
538+
# Test 2: MCP tool (not builtin) → should return MCP call
539+
parser_mcp = Mock()
540+
parser_mcp.current_content = '{"path": "/tmp"}'
541+
parser_mcp.current_role = Role.ASSISTANT
542+
parser_mcp.current_channel = "commentary"
543+
parser_mcp.current_recipient = "filesystem"
544+
545+
mcp_items = parse_remaining_state(parser_mcp)
546+
547+
assert len(mcp_items) == 1
548+
assert isinstance(mcp_items[0], McpCall)
549+
assert mcp_items[0].type == "mcp_call"
550+
assert mcp_items[0].name == "filesystem"
551+
assert mcp_items[0].server_label == "filesystem"
552+
assert mcp_items[0].status == "in_progress"
553+
554+
# Test 3: Built-in tool (python)
555+
# should NOT return MCP call, falls through to reasoning
556+
parser_builtin = Mock()
557+
parser_builtin.current_content = "print('hello')"
558+
parser_builtin.current_role = Role.ASSISTANT
559+
parser_builtin.current_channel = "commentary"
560+
parser_builtin.current_recipient = "python"
561+
562+
builtin_items = parse_remaining_state(parser_builtin)
563+
564+
# Should fall through to reasoning logic
565+
assert len(builtin_items) == 1
566+
assert not isinstance(builtin_items[0], McpCall)
567+
assert builtin_items[0].type == "reasoning"
568+
569+
570+
def test_parse_remaining_state_analysis_channel() -> None:
571+
"""Test parse_remaining_state with analysis channel and various recipients."""
572+
from unittest.mock import Mock
573+
574+
from vllm.entrypoints.harmony_utils import parse_remaining_state
575+
576+
# Test 1: functions.* recipient → should return function tool call
577+
parser_func = Mock()
578+
parser_func.current_content = '{"arg": "value"}'
579+
parser_func.current_role = Role.ASSISTANT
580+
parser_func.current_channel = "analysis"
581+
parser_func.current_recipient = "functions.my_tool"
582+
583+
func_items = parse_remaining_state(parser_func)
584+
585+
assert len(func_items) == 1
586+
assert not isinstance(func_items[0], McpCall)
587+
assert func_items[0].type == "function_call"
588+
assert func_items[0].name == "my_tool"
589+
assert func_items[0].status == "in_progress"
590+
591+
# Test 2: MCP tool (not builtin) → should return MCP call
592+
parser_mcp = Mock()
593+
parser_mcp.current_content = '{"query": "test"}'
594+
parser_mcp.current_role = Role.ASSISTANT
595+
parser_mcp.current_channel = "analysis"
596+
parser_mcp.current_recipient = "database"
597+
598+
mcp_items = parse_remaining_state(parser_mcp)
599+
600+
assert len(mcp_items) == 1
601+
assert isinstance(mcp_items[0], McpCall)
602+
assert mcp_items[0].type == "mcp_call"
603+
assert mcp_items[0].name == "database"
604+
assert mcp_items[0].server_label == "database"
605+
assert mcp_items[0].status == "in_progress"
606+
607+
# Test 3: Built-in tool (container)
608+
# should NOT return MCP call, falls through to reasoning
609+
parser_builtin = Mock()
610+
parser_builtin.current_content = "docker run"
611+
parser_builtin.current_role = Role.ASSISTANT
612+
parser_builtin.current_channel = "analysis"
613+
parser_builtin.current_recipient = "container"
614+
615+
builtin_items = parse_remaining_state(parser_builtin)
616+
617+
# Should fall through to reasoning logic
618+
assert len(builtin_items) == 1
619+
assert not isinstance(builtin_items[0], McpCall)
620+
assert builtin_items[0].type == "reasoning"

0 commit comments

Comments
 (0)