-
Notifications
You must be signed in to change notification settings - Fork 64
Refactor ClickHouse MCP tools with improved documentation and functionality #18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1ea2321
0ab7d1f
62529e3
c135d09
f3025c7
0430450
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,16 +28,23 @@ | |
mcp = FastMCP(MCP_SERVER_NAME, dependencies=deps) | ||
|
||
|
||
@mcp.tool() | ||
@mcp.tool( | ||
description="Lists all available databases in the ClickHouse server. Use this tool to get a complete list of databases before exploring their tables. No parameters required." | ||
) | ||
def list_databases(): | ||
logger.info("Listing all databases") | ||
client = create_clickhouse_client() | ||
result = client.command("SHOW DATABASES") | ||
logger.info(f"Found {len(result) if isinstance(result, list) else 1} databases") | ||
logger.info( | ||
f"Found {len(result) if isinstance(result, list) else 1} databases") | ||
return result | ||
|
||
|
||
@mcp.tool() | ||
@mcp.tool( | ||
description="Lists tables in a ClickHouse database with detailed schema information. " | ||
"Provides complete table structure including columns, types, and creation statements. " | ||
"Use the 'like' parameter to filter results with SQL LIKE pattern." | ||
) | ||
def list_tables(database: str, like: str = None): | ||
logger.info(f"Listing tables in database '{database}'") | ||
client = create_clickhouse_client() | ||
|
@@ -49,7 +56,8 @@ def list_tables(database: str, like: str = None): | |
# Get all table comments in one query | ||
table_comments_query = f"SELECT name, comment FROM system.tables WHERE database = {format_query_value(database)}" | ||
table_comments_result = client.query(table_comments_query) | ||
table_comments = {row[0]: row[1] for row in table_comments_result.result_rows} | ||
table_comments = {row[0]: row[1] | ||
for row in table_comments_result.result_rows} | ||
|
||
# Get all column comments in one query | ||
column_comments_query = f"SELECT table, name, comment FROM system.columns WHERE database = {format_query_value(database)}" | ||
|
@@ -105,7 +113,12 @@ def get_table_info(table): | |
return tables | ||
|
||
|
||
@mcp.tool() | ||
@mcp.tool( | ||
description="Executes a SELECT query against the ClickHouse database. " | ||
"Use for custom data retrieval with your own SQL. " | ||
"Queries are executed in read-only mode for safety. " | ||
"Format your query without specifying database names in SQL." | ||
) | ||
def run_select_query(query: str): | ||
logger.info(f"Executing SELECT query: {query}") | ||
client = create_clickhouse_client() | ||
|
@@ -125,6 +138,74 @@ def run_select_query(query: str): | |
return f"error running query: {err}" | ||
|
||
|
||
@mcp.tool( | ||
description="Retrieves a random sample of rows from a table using ORDER BY RAND(). " | ||
"Perfect for data exploration and quick analysis. " | ||
"Limit parameter capped at 10 rows. " | ||
"Use the where parameter for filtering specific data patterns." | ||
) | ||
def get_table_sample(database: str, table: str, columns: str = "*", limit: int = 5, where: str = None): | ||
"""Retrieves a random sample of rows from a table with ORDER BY RAND() | ||
|
||
Args: | ||
database: The database containing the table | ||
table: The table to sample data from | ||
columns: Comma-separated list of columns to retrieve (default: "*" for all columns) | ||
limit: Maximum number of rows to return (default: 5, max: 10) | ||
where: Optional WHERE clause to filter the data | ||
|
||
Returns: | ||
List of dictionaries, each representing a random row from the table | ||
|
||
Raises: | ||
ValueError: If limit is > 10 or < 1 | ||
ConnectionError: If there's an issue connecting to ClickHouse | ||
ClickHouseError: If there's an error executing the query | ||
""" | ||
# Validate limit | ||
if limit > 10: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why cap at 10? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this is an MCP and the package is meant to work with AI agents, I had to set this limit to avoid overflowing the language model’s context window. I deal with huge tables, and the AI kept trying to grab a way too large sample. Do you think I should increase the cap? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 10 seems low, have you checked how other MCP servers handle this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd increase this cap! I've had a bunch of times where I want to sample a couple of tables with sparse matches between them, and with the limit of 10, it can't find matches. |
||
logger.warning( | ||
f"Requested limit {limit} exceeds maximum of 10, using 10 instead") | ||
limit = 10 | ||
elif limit < 1: | ||
logger.warning( | ||
f"Requested limit {limit} is less than 1, using 1 instead") | ||
limit = 1 | ||
|
||
logger.info(f"Sampling {limit} random rows from {database}.{table}") | ||
client = create_clickhouse_client() | ||
|
||
try: | ||
# Build the query | ||
query = f"SELECT {columns} FROM {quote_identifier(database)}.{quote_identifier(table)}" | ||
|
||
# Add WHERE clause if provided | ||
if where: | ||
query += f" WHERE {where}" | ||
|
||
# Add random ordering and limit | ||
query += f" ORDER BY rand() LIMIT {limit}" | ||
|
||
logger.info(f"Executing sampling query: {query}") | ||
|
||
# Execute query with readonly setting for safety | ||
res = client.query(query, settings={"readonly": 1}) | ||
column_names = res.column_names | ||
rows = [] | ||
|
||
for row in res.result_rows: | ||
row_dict = {} | ||
for i, col_name in enumerate(column_names): | ||
row_dict[col_name] = row[i] | ||
rows.append(row_dict) | ||
|
||
logger.info(f"Sample query returned {len(rows)} rows") | ||
return rows | ||
except Exception as err: | ||
logger.error(f"Error executing sample query: {err}") | ||
return f"error running sample query: {err}" | ||
|
||
|
||
def create_clickhouse_client(): | ||
client_config = config.get_client_config() | ||
logger.info( | ||
|
@@ -139,7 +220,8 @@ def create_clickhouse_client(): | |
client = clickhouse_connect.get_client(**client_config) | ||
# Test the connection | ||
version = client.server_version | ||
logger.info(f"Successfully connected to ClickHouse server version {version}") | ||
logger.info( | ||
f"Successfully connected to ClickHouse server version {version}") | ||
return client | ||
except Exception as e: | ||
logger.error(f"Failed to connect to ClickHouse: {str(e)}") | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This ends up calling
putenv
underneath. Ideally we wouldn't mutate process environment variables. Especially since this changes getenv to always be used with a default, it seems unnecessary