-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Description
Initial Checks
- I confirm that I'm using the latest version of MCP Python SDK
- I confirm that I searched for my issue in https://github.com/modelcontextprotocol/python-sdk/issues before opening this issue
Description
Summary
The MCP Python SDK raises asyncio.CancelledError when a server connection fails. This is structurally identical to external task cancellation (Ctrl+C, SIGTERM), making it impossible for client code to correctly handle both scenarios.
Problem
When an MCP server becomes unreachable during a request:
try:
result = await session.list_tools()
except asyncio.CancelledError:
# Is this:
# A) Server died (should reconnect/retry)
# B) Operator hit Ctrl+C (should propagate for clean shutdown)
#
# Cannot distinguish.Root Cause
The SDK uses anyio for structured concurrency. Transport layers (mcp/client/sse.py, mcp/client/streamable_http.py) create task groups:
# mcp/client/sse.py (simplified)
async with anyio.create_task_group() as tg:
tg.start_soon(sse_reader) # Reads from server
tg.start_soon(post_writer) # Writes to server
yield read_stream, write_streamWhen the server connection fails:
sse_readertask fails (connection lost)- anyio's task group cancels sibling tasks
CancelledErrorpropagates toresponse_stream_reader.receive()insession.py- Client code catches
CancelledError
This is the same exception type raised by task.cancel() during external shutdown.
Evidence
Exception characteristics when catching CancelledError:
| Scenario | ex.args |
task.cancelling() delta |
|---|---|---|
| SSE server dies | () |
+1 |
| Streaming HTTP server dies | ('Cancelled by cancel scope...',) |
+1 |
External task.cancel() |
() |
+1 |
SSE internal failure and external cancellation have identical characteristics.
Impact
If client converts CancelledError → ConnectionError:
- External shutdown (Ctrl+C) raises ConnectionError instead of CancelledError
- Retry loops may continue instead of exiting
- asyncio's cooperative cancellation model is broken
If client propagates CancelledError:
- Server failures escape as BaseException
- Callers must use
except BaseExceptionto handle failures - Poor error messages ("CancelledError" vs "connection lost")
Files Involved
| File | Role |
|---|---|
mcp/client/sse.py |
SSE transport - creates task group |
mcp/client/streamable_http.py |
Streaming HTTP transport - creates task group |
mcp/shared/session.py |
response_stream_reader.receive() - where CancelledError surfaces |
Example Code
#!/usr/bin/env python3
"""
Minimal reproduction: CancelledError ambiguity in MCP SDK.
This script demonstrates that when an MCP server dies mid-request,
the client receives asyncio.CancelledError - the same exception type
raised by external task cancellation (Ctrl+C, SIGTERM).
Run: python repro_cancelled_error_ambiguity.py
Expected output:
- Test 1 (server dies): CancelledError
- Test 2 (external cancel): CancelledError
Both scenarios produce identical exceptions, making it impossible
for client code to distinguish server failure from intentional shutdown.
"""
import asyncio
import multiprocessing
import socket
import time
from typing import Any
import uvicorn
from starlette.applications import Starlette
from starlette.requests import Request
from starlette.responses import Response
from starlette.routing import Mount, Route
from mcp.client.session import ClientSession
from mcp.client.sse import sse_client
from mcp.server import Server
from mcp.server.sse import SseServerTransport
from mcp.server.transport_security import TransportSecuritySettings
from mcp.types import TextContent, Tool
# === Minimal MCP Server ===
class SlowToolServer(Server):
def __init__(self):
super().__init__("test-server")
@self.list_tools()
async def handle_list_tools() -> list[Tool]:
return [Tool(
name="slow_tool",
description="Takes 10 seconds",
inputSchema={"type": "object", "properties": {}},
)]
@self.call_tool()
async def handle_call_tool(name: str, args: dict[str, Any]) -> list[TextContent]:
await asyncio.sleep(10.0)
return [TextContent(type="text", text="Done")]
def run_server(port: int) -> None:
security = TransportSecuritySettings(
allowed_hosts=["127.0.0.1:*"],
allowed_origins=["http://127.0.0.1:*"],
)
sse = SseServerTransport("/messages/", security_settings=security)
server = SlowToolServer()
async def handle_sse(request: Request) -> Response:
async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
await server.run(streams[0], streams[1], server.create_initialization_options())
return Response()
app = Starlette(routes=[
Route("/sse", endpoint=handle_sse),
Mount("/messages/", app=sse.handle_post_message),
])
uvicorn.Server(uvicorn.Config(app=app, host="127.0.0.1", port=port, log_level="error")).run()
def get_free_port() -> int:
with socket.socket() as s:
s.bind(("127.0.0.1", 0))
return s.getsockname()[1]
def wait_for_server(port: int, timeout: float = 5.0) -> None:
start = time.time()
while time.time() - start < timeout:
try:
with socket.socket() as s:
s.settimeout(0.1)
s.connect(("127.0.0.1", port))
return
except (ConnectionRefusedError, OSError):
time.sleep(0.01)
raise TimeoutError(f"Server did not start within {timeout}s")
# === Test 1: Server dies mid-request ===
async def test_server_dies() -> str:
"""Kill server while request is in flight. What exception do we get?"""
port = get_free_port()
proc = multiprocessing.Process(target=run_server, kwargs={"port": port}, daemon=True)
proc.start()
wait_for_server(port)
exception_type = None
try:
async with sse_client(f"http://127.0.0.1:{port}/sse") as (r, w):
async with ClientSession(r, w) as session:
await session.initialize()
task = asyncio.create_task(session.call_tool("slow_tool", {}))
await asyncio.sleep(0.3)
# Kill server while request is pending
proc.kill()
proc.join(timeout=1)
await asyncio.wait_for(task, timeout=5.0)
except asyncio.CancelledError:
exception_type = "CancelledError"
except Exception as ex:
exception_type = type(ex).__name__
finally:
if proc.is_alive():
proc.kill()
return exception_type or "None"
# === Test 2: External cancellation ===
async def test_external_cancel() -> str:
"""Cancel task externally (simulating Ctrl+C). What exception do we get?"""
port = get_free_port()
proc = multiprocessing.Process(target=run_server, kwargs={"port": port}, daemon=True)
proc.start()
wait_for_server(port)
exception_type = None
try:
async with sse_client(f"http://127.0.0.1:{port}/sse") as (r, w):
async with ClientSession(r, w) as session:
await session.initialize()
task = asyncio.create_task(session.call_tool("slow_tool", {}))
await asyncio.sleep(0.3)
# External cancellation
task.cancel()
await task
except asyncio.CancelledError:
exception_type = "CancelledError"
except Exception as ex:
exception_type = type(ex).__name__
finally:
if proc.is_alive():
proc.kill()
return exception_type or "None"
# === Main ===
if __name__ == "__main__":
print("Test 1: Server dies mid-request")
result1 = asyncio.run(test_server_dies())
print(f" Exception: {result1}")
print()
print("Test 2: External cancellation (Ctrl+C simulation)")
result2 = asyncio.run(test_external_cancel())
print(f" Exception: {result2}")
print()
print("Result:")
if result1 == result2 == "CancelledError":
print(" Both scenarios raise CancelledError.")
print(" Client code cannot distinguish server failure from shutdown request.")
else:
print(f" Test 1: {result1}")
print(f" Test 2: {result2}")Python & MCP Python SDK
Python: 3.12.12
MCP SDK: 1.20.0