Skip to content

Commit fa24775

Browse files
committed
feat: add schema tools
Add 2 mcp tools for querying schema versions and blame. - get_schema_versions() - get_schema_blame()
1 parent b573ff4 commit fa24775

File tree

4 files changed

+102
-0
lines changed

4 files changed

+102
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Supports both DataHub Core and DataHub Cloud.
1111
- Fetching metadata for any entity
1212
- Traversing the lineage graph, both upstream and downstream
1313
- Listing SQL queries associated with a dataset
14+
- Querying schema versions and schema blame.
1415

1516
## Demo
1617

src/mcp_server_datahub/gql/entity_details.gql

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,3 +1205,45 @@ query GetEntityLineage($input: SearchAcrossLineageInput!) {
12051205
}
12061206
}
12071207
}
1208+
1209+
query getSchemaVersionList($input: GetSchemaVersionListInput!) {
1210+
getSchemaVersionList(input: $input) {
1211+
latestVersion {
1212+
semanticVersion
1213+
semanticVersionTimestamp
1214+
versionStamp
1215+
__typename
1216+
}
1217+
semanticVersionList {
1218+
semanticVersion
1219+
semanticVersionTimestamp
1220+
versionStamp
1221+
__typename
1222+
}
1223+
__typename
1224+
}
1225+
}
1226+
1227+
1228+
query getSchemaBlame($input: GetSchemaBlameInput!) {
1229+
getSchemaBlame(input: $input) {
1230+
version {
1231+
semanticVersion
1232+
semanticVersionTimestamp
1233+
versionStamp
1234+
__typename
1235+
}
1236+
schemaFieldBlameList {
1237+
fieldPath
1238+
schemaFieldChange {
1239+
timestampMillis
1240+
lastSemanticVersion
1241+
lastSchemaFieldChange
1242+
versionStamp
1243+
__typename
1244+
}
1245+
__typename
1246+
}
1247+
__typename
1248+
}
1249+
}

src/mcp_server_datahub/mcp_server.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,47 @@ def get_lineage(urn: str, upstream: bool, max_hops: int = 1) -> dict:
313313
return lineage
314314

315315

316+
@mcp.tool(description="Retrieve schema versions for a given dataset URN.")
317+
def get_schema_versions(dataset_urn: str) -> List[Dict[str, Any]]:
318+
client = get_client()
319+
320+
variables = {
321+
"input": {
322+
"datasetUrn": dataset_urn,
323+
}
324+
}
325+
326+
response = _execute_graphql(
327+
client._graph,
328+
query=entity_details_fragment_gql,
329+
variables=variables,
330+
operation_name="getSchemaVersionList",
331+
)
332+
333+
return response.get("getSchemaVersionList", [])
334+
335+
336+
@mcp.tool(description="Use this tool to get a schema blame.")
337+
def get_schema_blame(datasetUrn: str, version: str) -> Dict[str, Any]:
338+
client = get_client()
339+
340+
variables = {
341+
"input": {
342+
"datasetUrn": datasetUrn,
343+
"version": version,
344+
}
345+
}
346+
347+
resp = _execute_graphql(
348+
client._graph,
349+
query=entity_details_fragment_gql,
350+
variables=variables,
351+
operation_name="getSchemaBlame",
352+
)
353+
354+
return resp.get("getSchemaBlame", {})
355+
356+
316357
if __name__ == "__main__":
317358
import sys
318359

@@ -348,3 +389,9 @@ def _divider() -> None:
348389
_divider()
349390
print("Getting queries", urn)
350391
print(json.dumps(get_dataset_queries(urn), indent=2))
392+
_divider()
393+
print("Getting schema versions", urn)
394+
print(json.dumps(get_schema_versions(urn), indent=2))
395+
_divider()
396+
print("Getting schema blame", urn)
397+
print(json.dumps(get_schema_blame(urn, "latest"), indent=2))

tests/test_mcp_server.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
get_dataset_queries,
1111
get_entity,
1212
get_lineage,
13+
get_schema_blame,
14+
get_schema_versions,
1315
search,
1416
with_client,
1517
)
@@ -58,6 +60,16 @@ def test_search() -> None:
5860
assert res is not None
5961

6062

63+
def test_get_schema_version() -> None:
64+
res = get_schema_versions(_test_urn)
65+
assert res is not None
66+
67+
68+
def test_get_schema_blame() -> None:
69+
res = get_schema_blame(_test_urn)
70+
assert res is not None
71+
72+
6173
if __name__ == "__main__":
6274
import pytest
6375

0 commit comments

Comments
 (0)