Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ dependencies = [
"asyncer>=0.0.8",
"fastmcp==2.10.5",
"jmespath~=1.0.1",
"loguru",
"pydantic>=2.0,<2.12",
]
license = "Apache-2.0"

Expand Down
9 changes: 7 additions & 2 deletions scripts/test_main_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,12 @@ async def main(urn_or_query: Optional[str]) -> None:
print(
json.dumps(
await _call_tool(
mcp_client, "get_lineage", urn=urn, upstream=False, max_hops=3
mcp_client,
"get_lineage",
urn=urn,
column=None,
upstream=False,
max_hops=3,
),
indent=2,
)
Expand All @@ -85,7 +90,7 @@ async def main(urn_or_query: Optional[str]) -> None:
print(f"Getting queries: {urn}")
print(
json.dumps(
await _call_tool(mcp_client, "get_dataset_queries", dataset_urn=urn),
await _call_tool(mcp_client, "get_dataset_queries", urn=urn),
indent=2,
)
)
Expand Down
11 changes: 8 additions & 3 deletions src/mcp_server_datahub/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
type=click.Choice(["stdio", "sse", "http"]),
default="stdio",
)
@click.option(
"--host",
type=click.STRING,
default="127.0.0.1",
)
@click.option(
"--debug",
is_flag=True,
Expand All @@ -29,7 +34,7 @@
@telemetry.with_telemetry(
capture_kwargs=["transport"],
)
def main(transport: Literal["stdio", "sse", "http"], debug: bool) -> None:
def main(transport: Literal["stdio", "sse", "http"], host:str, debug: bool) -> None:
client = DataHubClient.from_env(
client_mode=ClientMode.SDK,
datahub_component=f"mcp-server-datahub/{__version__}",
Expand All @@ -42,9 +47,9 @@ def main(transport: Literal["stdio", "sse", "http"], debug: bool) -> None:

with with_datahub_client(client):
if transport == "http":
mcp.run(transport=transport, show_banner=False, stateless_http=True)
mcp.run(transport=transport, show_banner=False, stateless_http=True, host=host)
else:
mcp.run(transport=transport, show_banner=False)
mcp.run(transport=transport, show_banner=False, host=host)


if __name__ == "__main__":
Expand Down
10 changes: 4 additions & 6 deletions src/mcp_server_datahub/_telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,10 @@ async def on_call_tool(
# This method typically returns fastmcp.tools.tool.ToolResult.
if isinstance(result, mt.CallToolResult):
telemetry_data["tool_result_is_error"] = result.isError
telemetry_data["tool_result_length"] = (
sum(
len(block.text)
for block in result.content
if isinstance(block, mt.TextContent)
),
telemetry_data["tool_result_length"] = sum(
len(block.text)
for block in result.content
if isinstance(block, mt.TextContent)
)

return result
Expand Down
10 changes: 10 additions & 0 deletions src/mcp_server_datahub/gql/entity_details.gql
Original file line number Diff line number Diff line change
Expand Up @@ -1240,9 +1240,19 @@ fragment datasetSchema on Dataset {
}
}

fragment viewProperties on Dataset {
viewProperties {
materialized
logic
# formattedLogic
language
}
}

fragment entityDetails on Entity {
... on Dataset {
...datasetSchema
...viewProperties
}
... on StructuredPropertyEntity {
...structuredPropertyFields
Expand Down
26 changes: 26 additions & 0 deletions src/mcp_server_datahub/gql/search.gql
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@ fragment SearchEntityInfo on Entity {
name
}
}
... on GlossaryTerm {
properties {
name
}
}
... on GlossaryNode {
properties {
name
}
}
... on Domain {
properties {
name
}
}
... on DataProduct {
properties {
name
}
}
}

fragment FacetEntityInfo on Entity {
Expand Down Expand Up @@ -70,6 +90,8 @@ query search(
entity {
...SearchEntityInfo
}
# TODO: Consider adding these fields for enhanced search experience:
# score # BM25 score for keyword search relevance
}
facets {
field
Expand All @@ -83,5 +105,9 @@ query search(
}
}
}
# TODO: Consider adding metadata section for search algorithm transparency:
# metadata {
# scoringMethod # e.g., "BM25" vs "COSINE_SIMILARITY"
# }
}
}
111 changes: 111 additions & 0 deletions src/mcp_server_datahub/gql/semantic_search.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
fragment SearchEntityInfo on Entity {
urn

# For some entity types, the urns are not human-readable. For those,
# we pull the name as well.
... on Dataset {
properties {
name
}
}
... on Chart {
properties {
name
}
}
... on Dashboard {
properties {
name
}
}
... on Container {
properties {
name
}
}
... on GlossaryTerm {
properties {
name
}
}
... on GlossaryNode {
properties {
name
}
}
... on Domain {
properties {
name
}
}
... on DataProduct {
properties {
name
}
}
}

fragment FacetEntityInfo on Entity {
... on Dataset {
name
properties {
name
}
}
... on Container {
subTypes {
typeNames
}
properties {
name
}
}
... on GlossaryTerm {
properties {
name
}
}
}

query semanticSearch(
$types: [EntityType!]
$query: String!
$orFilters: [AndFilterInput!]
$count: Int!
) {
semanticSearchAcrossEntities(
input: {
query: $query
count: $count
types: $types
orFilters: $orFilters
searchFlags: { skipHighlighting: true, maxAggValues: 5 }
}
) {
count
total
searchResults {
entity {
...SearchEntityInfo
}
# TODO: Consider adding these fields for enhanced semantic search experience:
# score # Cosine similarity score (0-1) for semantic relevance
}
facets {
field
displayName
aggregations {
value
count
displayName
entity {
...FacetEntityInfo
}
}
}
# TODO: Consider adding metadata section for search algorithm transparency:
# metadata {
# scoringMethod # e.g., "COSINE_SIMILARITY" vs "BM25"
# }
}
}
Loading