Skip to content

Commit aba06bc

Browse files
authored
feat: better error context (#38)
### what Makes some slightly-unclear error-ish conditions a little less unclear. ### why These are the cases that have been commented on as hard to follow/interpret. ### testing Apart from the trivial unit tests, it – seems to help? ### docs Even this kinda is docs more than anything.
1 parent ba02ba9 commit aba06bc

File tree

4 files changed

+104
-16
lines changed

4 files changed

+104
-16
lines changed

stacklet/mcp/assetdb/redash.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ..lifespan import server_cached
2121
from ..settings import SETTINGS
2222
from ..stacklet_auth import StackletCredentials
23+
from ..utils.error import AnnotatedError
2324
from .models import ExportFormat, Job, Query, QueryListResponse, QueryResult, QueryUpsert
2425

2526

@@ -92,8 +93,18 @@ async def list_queries(
9293
if tags:
9394
params["tags"] = tags
9495

95-
result = await self._make_request("GET", "api/queries", params=params)
96-
return QueryListResponse(**result)
96+
try:
97+
result = await self._make_request("GET", "api/queries", params=params)
98+
return QueryListResponse(**result)
99+
except httpx.HTTPStatusError as err:
100+
if err.response.status_code == 400:
101+
raise AnnotatedError(
102+
problem="Backend rejected request",
103+
likely_cause="the page parameter was out of bounds",
104+
next_steps="check page 1, or try a simpler search",
105+
original_error=str(err),
106+
)
107+
raise
97108

98109
async def get_query(self, query_id: int) -> Query:
99110
"""
@@ -196,11 +207,19 @@ async def _poll_job(self, job: Job, timeout: int) -> int:
196207
if job.query_result_id:
197208
return job.query_result_id
198209
elif job.status.is_terminal:
199-
raise RuntimeError(f"Query execution failed: {job.error or 'Unknown error.'}")
210+
raise AnnotatedError(
211+
problem=f"Query execution error: {job.error or '(unknown)'}",
212+
likely_cause="the query SQL or parameters were invalid",
213+
next_steps="investigate the errors, or try a simpler query and build up",
214+
)
200215

201216
remaining_s = cutoff - time.monotonic()
202217
if remaining_s <= 0:
203-
raise RuntimeError(f"Query execution timed out after {timeout} seconds")
218+
raise AnnotatedError(
219+
problem=f"Timed out after {timeout} seconds",
220+
likely_cause="the query is still executing",
221+
next_steps="request cached results (with max_age=-1), or try a simpler query",
222+
)
204223
await asyncio.sleep(min(interval_s, remaining_s))
205224
interval_s *= 2
206225

stacklet/mcp/platform/graphql.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ..lifespan import server_cached
2929
from ..settings import SETTINGS
3030
from ..stacklet_auth import StackletCredentials
31+
from ..utils.error import AnnotatedError
3132
from .models import (
3233
ConnectionExport,
3334
ExportRequest,
@@ -80,7 +81,11 @@ async def query(self, query: str, variables: dict[str, Any]) -> GraphQLQueryResu
8081
Structured GraphQL query result
8182
"""
8283
if not self.enable_mutations and has_mutations(query):
83-
raise Exception("Mutations not allowed in the client")
84+
raise AnnotatedError(
85+
problem="Mutations disabled",
86+
likely_cause="the user doesn't want you to run mutations",
87+
next_steps="tell the user to set 'STACKLET_MCP_PLATFORM_ALLOW_MUTATIONS'",
88+
)
8489

8590
return await self._query(query, variables)
8691

@@ -166,7 +171,11 @@ async def start_export(self, spec: ExportRequest) -> str:
166171
"""
167172
result = await self._query(self.Q_START_EXPORT, {"input": spec.for_graphql()})
168173
if result.errors:
169-
raise RuntimeError(f"Export mutation failed: {result.errors}")
174+
raise AnnotatedError(
175+
problem=f"Export mutation failed: {result.errors}",
176+
likely_cause="what it says",
177+
next_steps="check data types with 'platform_get_types'",
178+
)
170179

171180
# If no errors, data is at least guaranteed truthy.
172181
export = cast(dict[str, Any], result.data)["exportConnection"]["export"]

stacklet/mcp/utils/error.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# LICENSE HEADER MANAGED BY add-license-header
2+
#
3+
# Copyright (c) 2025 Stacklet, Inc.
4+
#
5+
6+
"""
7+
Error handling utilities for creating annotated ToolErrors with user guidance.
8+
"""
9+
10+
from fastmcp.exceptions import ToolError
11+
12+
13+
class AnnotatedError(ToolError):
14+
"""
15+
A well-annotated ToolError with context and guidance.
16+
17+
Args:
18+
problem: Clear description of what went wrong
19+
likely_cause: Most probable reason for the failure
20+
next_steps: Actionable advice for resolving the issue
21+
original_error: Optional underlying error details
22+
"""
23+
24+
def __init__(
25+
self,
26+
problem: str,
27+
likely_cause: str,
28+
next_steps: str,
29+
original_error: str | None = None,
30+
):
31+
message = f"{problem}. This likely means {likely_cause}. Next steps: {next_steps}"
32+
if original_error:
33+
message += f". Original error: {original_error}"
34+
super().__init__(message)

tests/test_tools_assetdb.py

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,10 @@ async def test_page_missing(self):
147147
):
148148
result = await self.assert_call({"page": 999}, error=True)
149149

150-
# XXX better errors might be nice, "page 999 out of range" is… likely?
151-
assert result.text == "Error calling tool 'assetdb_query_list': mocked http 400"
150+
assert result.text == (
151+
"Backend rejected request. This likely means the page parameter was out of bounds. "
152+
"Next steps: check page 1, or try a simpler search. Original error: mocked http 400"
153+
)
152154

153155
@json_guard_parametrize([5, 10])
154156
async def test_page_size(self, mangle, value):
@@ -220,7 +222,9 @@ async def test_not_found(self):
220222
):
221223
result = await self.assert_call({"query_id": 999}, error=True)
222224

223-
# XXX better errors might be nice, "query 999 does not exist"
225+
# Generally, this is enough context for the LLM to handle it fine.
226+
# Annotated errors come into their own when the meaning of a raw
227+
# error is not immediately obvious.
224228
assert result.text == "Error calling tool 'assetdb_query_get': mocked http 404"
225229

226230

@@ -437,7 +441,7 @@ async def assert_tool_call(self, params, *expect_http, expect_error=None):
437441
result = await self.assert_call(params, error=bool(expect_error))
438442

439443
if expect_error:
440-
assert result.text == f"Error calling tool '{self.tool_name}': " + expect_error
444+
assert result.text == expect_error
441445
else:
442446
self.assert_tool_query_result(result)
443447

@@ -549,7 +553,10 @@ async def test_job_timeout(self, mangle, value, async_sleeps):
549553
self.expect_get_job(self.job_response(JobStatus.STARTED)),
550554
self.expect_get_job(self.job_response(JobStatus.STARTED)),
551555
self.expect_get_job(self.job_response(JobStatus.STARTED)),
552-
expect_error="Query execution timed out after 60 seconds",
556+
expect_error=(
557+
"Timed out after 60 seconds. This likely means the query is still executing. "
558+
"Next steps: request cached results (with max_age=-1), or try a simpler query"
559+
),
553560
)
554561
assert async_sleeps == [2, 4, 8, 16, 30]
555562

@@ -559,7 +566,11 @@ async def test_job_failure(self):
559566
{"query_id": self.QUERY_ID},
560567
self.expect_post(self.post_data(), self.job_response(JobStatus.QUEUED)),
561568
self.expect_get_job(self.job_response(JobStatus.FAILED)),
562-
expect_error="Query execution failed: Oh no borken",
569+
expect_error=(
570+
"Query execution error: Oh no borken. This likely means the query SQL or "
571+
"parameters were invalid. Next steps: investigate the errors, or try a simpler "
572+
"query and build up"
573+
),
563574
)
564575

565576
async def test_job_cancellation(self):
@@ -570,7 +581,11 @@ async def test_job_cancellation(self):
570581
{"query_id": self.QUERY_ID},
571582
self.expect_post(self.post_data(), self.job_response(JobStatus.QUEUED)),
572583
self.expect_get_job(self.job_response(JobStatus.CANCELED)),
573-
expect_error="Query execution failed: Unknown error.",
584+
expect_error=(
585+
"Query execution error: (unknown). This likely means the query SQL or "
586+
"parameters were invalid. Next steps: investigate the errors, or try a simpler "
587+
"query and build up"
588+
),
574589
)
575590

576591

@@ -641,7 +656,10 @@ async def test_job_timeout(self, mangle, value, async_sleeps):
641656
self.expect_get_job(self.job_response(JobStatus.STARTED)),
642657
self.expect_get_job(self.job_response(JobStatus.STARTED)),
643658
self.expect_get_job(self.job_response(JobStatus.STARTED)),
644-
expect_error="Query execution timed out after 60 seconds",
659+
expect_error=(
660+
"Timed out after 60 seconds. This likely means the query is still executing. "
661+
"Next steps: request cached results (with max_age=-1), or try a simpler query"
662+
),
645663
)
646664
assert async_sleeps == [2, 4, 8, 16, 30]
647665

@@ -651,7 +669,11 @@ async def test_job_failure(self):
651669
{"query": "SELECT 1"},
652670
self.expect_post(self.post_data(), self.job_response(JobStatus.QUEUED)),
653671
self.expect_get_job(self.job_response(JobStatus.FAILED)),
654-
expect_error="Query execution failed: Oh no borken",
672+
expect_error=(
673+
"Query execution error: Oh no borken. This likely means the query SQL or "
674+
"parameters were invalid. Next steps: investigate the errors, or try a simpler "
675+
"query and build up"
676+
),
655677
)
656678

657679
async def test_job_cancellation(self):
@@ -662,7 +684,11 @@ async def test_job_cancellation(self):
662684
{"query": "SELECT 1"},
663685
self.expect_post(self.post_data(), self.job_response(JobStatus.QUEUED)),
664686
self.expect_get_job(self.job_response(JobStatus.CANCELED)),
665-
expect_error="Query execution failed: Unknown error.",
687+
expect_error=(
688+
"Query execution error: (unknown). This likely means the query SQL or "
689+
"parameters were invalid. Next steps: investigate the errors, or try a simpler "
690+
"query and build up"
691+
),
666692
)
667693

668694

0 commit comments

Comments
 (0)