J0hnG4lt · J0hnG4lt · Jul 29, 2025 · Aug 1, 2025 · Aug 4, 2025 · Aug 4, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,6 +9,8 @@ dependencies = [
     "asyncer>=0.0.8",
     "fastmcp==2.10.5",
     "jmespath~=1.0.1",
+    "loguru",
+    "pydantic>=2.0,<2.12",
 ]
 license = "Apache-2.0"
 

diff --git a/scripts/test_main_tools.py b/scripts/test_main_tools.py
@@ -76,7 +76,12 @@ async def main(urn_or_query: Optional[str]) -> None:
         print(
             json.dumps(
                 await _call_tool(
-                    mcp_client, "get_lineage", urn=urn, upstream=False, max_hops=3
+                    mcp_client,
+                    "get_lineage",
+                    urn=urn,
+                    column=None,
+                    upstream=False,
+                    max_hops=3,
                 ),
                 indent=2,
             )
@@ -85,7 +90,7 @@ async def main(urn_or_query: Optional[str]) -> None:
         print(f"Getting queries: {urn}")
         print(
             json.dumps(
-                await _call_tool(mcp_client, "get_dataset_queries", dataset_urn=urn),
+                await _call_tool(mcp_client, "get_dataset_queries", urn=urn),
                 indent=2,
             )
         )

diff --git a/src/mcp_server_datahub/__main__.py b/src/mcp_server_datahub/__main__.py
@@ -21,6 +21,11 @@
     type=click.Choice(["stdio", "sse", "http"]),
     default="stdio",
 )
+@click.option(
+    "--host",
+    type=click.STRING,
+    default="127.0.0.1",
+)
 @click.option(
     "--debug",
     is_flag=True,
@@ -29,7 +34,7 @@
 @telemetry.with_telemetry(
     capture_kwargs=["transport"],
 )
-def main(transport: Literal["stdio", "sse", "http"], debug: bool) -> None:
+def main(transport: Literal["stdio", "sse", "http"], host:str, debug: bool) -> None:
     client = DataHubClient.from_env(
         client_mode=ClientMode.SDK,
         datahub_component=f"mcp-server-datahub/{__version__}",
@@ -42,9 +47,9 @@ def main(transport: Literal["stdio", "sse", "http"], debug: bool) -> None:
 
     with with_datahub_client(client):
         if transport == "http":
-            mcp.run(transport=transport, show_banner=False, stateless_http=True)
+            mcp.run(transport=transport, show_banner=False, stateless_http=True, host=host)
         else:
-            mcp.run(transport=transport, show_banner=False)
+            mcp.run(transport=transport, show_banner=False, host=host)
 
 
 if __name__ == "__main__":

diff --git a/src/mcp_server_datahub/_telemetry.py b/src/mcp_server_datahub/_telemetry.py
@@ -35,12 +35,10 @@ async def on_call_tool(
                 # This method typically returns fastmcp.tools.tool.ToolResult.
                 if isinstance(result, mt.CallToolResult):
                     telemetry_data["tool_result_is_error"] = result.isError
-                telemetry_data["tool_result_length"] = (
-                    sum(
-                        len(block.text)
-                        for block in result.content
-                        if isinstance(block, mt.TextContent)
-                    ),
+                telemetry_data["tool_result_length"] = sum(
+                    len(block.text)
+                    for block in result.content
+                    if isinstance(block, mt.TextContent)
                 )
 
                 return result

diff --git a/src/mcp_server_datahub/gql/entity_details.gql b/src/mcp_server_datahub/gql/entity_details.gql
@@ -1240,9 +1240,19 @@ fragment datasetSchema on Dataset {
   }
 }
 
+fragment viewProperties on Dataset {
+    viewProperties {
+        materialized
+        logic
+        # formattedLogic
+        language
+    }
+}
+
 fragment entityDetails on Entity {
   ... on Dataset {
     ...datasetSchema
+    ...viewProperties
   }
   ... on StructuredPropertyEntity {
     ...structuredPropertyFields

diff --git a/src/mcp_server_datahub/gql/search.gql b/src/mcp_server_datahub/gql/search.gql
@@ -23,6 +23,26 @@ fragment SearchEntityInfo on Entity {
       name
     }
   }
+  ... on GlossaryTerm {
+    properties {
+      name
+    }
+  }
+  ... on GlossaryNode {
+    properties {
+      name
+    }
+  }
+  ... on Domain {
+    properties {
+      name
+    }
+  }
+  ... on DataProduct {
+    properties {
+      name
+    }
+  }
 }
 
 fragment FacetEntityInfo on Entity {
@@ -70,6 +90,8 @@ query search(
       entity {
         ...SearchEntityInfo
       }
+      # TODO: Consider adding these fields for enhanced search experience:
+      # score               # BM25 score for keyword search relevance
     }
     facets {
       field
@@ -83,5 +105,9 @@ query search(
         }
       }
     }
+    # TODO: Consider adding metadata section for search algorithm transparency:
+    # metadata {
+    #   scoringMethod       # e.g., "BM25" vs "COSINE_SIMILARITY"
+    # }
   }
 }
diff --git a/src/mcp_server_datahub/gql/semantic_search.gql b/src/mcp_server_datahub/gql/semantic_search.gql
@@ -0,0 +1,111 @@
+fragment SearchEntityInfo on Entity {
+  urn
+
+  # For some entity types, the urns are not human-readable. For those,
+  # we pull the name as well.
+  ... on Dataset {
+    properties {
+      name
+    }
+  }
+  ... on Chart {
+    properties {
+      name
+    }
+  }
+  ... on Dashboard {
+    properties {
+      name
+    }
+  }
+  ... on Container {
+    properties {
+      name
+    }
+  }
+  ... on GlossaryTerm {
+    properties {
+      name
+    }
+  }
+  ... on GlossaryNode {
+    properties {
+      name
+    }
+  }
+  ... on Domain {
+    properties {
+      name
+    }
+  }
+  ... on DataProduct {
+    properties {
+      name
+    }
+  }
+}
+
+fragment FacetEntityInfo on Entity {
+  ... on Dataset {
+    name
+    properties {
+      name
+    }
+  }
+  ... on Container {
+    subTypes {
+      typeNames
+    }
+    properties {
+      name
+    }
+  }
+  ... on GlossaryTerm {
+    properties {
+      name
+    }
+  }
+}
+
+query semanticSearch(
+  $types: [EntityType!]
+  $query: String!
+  $orFilters: [AndFilterInput!]
+  $count: Int!
+) {
+  semanticSearchAcrossEntities(
+    input: {
+      query: $query
+      count: $count
+      types: $types
+      orFilters: $orFilters
+      searchFlags: { skipHighlighting: true, maxAggValues: 5 }
+    }
+  ) {
+    count
+    total
+    searchResults {
+      entity {
+        ...SearchEntityInfo
+      }
+      # TODO: Consider adding these fields for enhanced semantic search experience:
+      # score               # Cosine similarity score (0-1) for semantic relevance
+    }
+    facets {
+      field
+      displayName
+      aggregations {
+        value
+        count
+        displayName
+        entity {
+          ...FacetEntityInfo
+        }
+      }
+    }
+    # TODO: Consider adding metadata section for search algorithm transparency:
+    # metadata {
+    #   scoringMethod       # e.g., "COSINE_SIMILARITY" vs "BM25"
+    # }
+  }
+}