Skip to content

Virtual Nodes System #20

@tfius

Description

@tfius

Parent: #10 (EPIC: Transform DataCortex into Context Engine)
Priority: LOW | Phase: 4 - Multimodal Integration | Complexity: Medium
Depends on: #12 (External Links), #17 (Connectors Architecture)

What to Implement

Create a system for representing external entities as "virtual nodes" in the graph without creating local markdown files - enabling seamless integration of external data with local knowledge.

Features

  1. Virtual node storage (database, not files)
  2. Virtual node types for each connector
  3. Unified graph queries (local + virtual)
  4. Virtual node rendering in UI
  5. Linking between local and virtual nodes

How to Implement

Step 1: Virtual Node Storage

CREATE TABLE virtual_nodes (
    id TEXT PRIMARY KEY,
    source TEXT NOT NULL,          -- github, slack, etc.
    source_id TEXT NOT NULL,       -- ID in source system
    node_type TEXT NOT NULL,       -- github_issue, slack_message
    title TEXT NOT NULL,
    content TEXT,
    url TEXT,
    author TEXT,
    metadata JSON,
    embedding BLOB,                -- Cached embedding
    created_at TEXT,
    updated_at TEXT,
    synced_at TEXT,
    UNIQUE(source, source_id)
);

CREATE TABLE virtual_edges (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    source_node TEXT NOT NULL,     -- Can be virtual or local
    target_node TEXT NOT NULL,     -- Can be virtual or local
    edge_type TEXT,                -- references, mentions, created_by
    metadata JSON,
    created_at TEXT
);

CREATE INDEX idx_virtual_nodes_source ON virtual_nodes(source);
CREATE INDEX idx_virtual_nodes_type ON virtual_nodes(node_type);

Step 2: Unified Graph Builder

# src/datacortex/indexer/graph_builder.py
def build_unified_graph(
    config: DatacortexConfig,
    include_local: bool = True,
    include_virtual: bool = True,
    sources: list[str] = None
) -> Graph:
    """Build graph including both local and virtual nodes."""
    nodes = []
    edges = []

    if include_local:
        local_nodes, local_edges = build_local_graph(config)
        nodes.extend(local_nodes)
        edges.extend(local_edges)

    if include_virtual:
        virtual_nodes, virtual_edges = load_virtual_nodes(config, sources)
        nodes.extend(virtual_nodes)
        edges.extend(virtual_edges)

        # Add cross-links between local and virtual
        cross_edges = find_cross_links(local_nodes, virtual_nodes)
        edges.extend(cross_edges)

    return Graph(nodes=nodes, edges=edges)

def find_cross_links(local_nodes: list[Node], virtual_nodes: list[Node]) -> list[Edge]:
    """Find links between local and virtual nodes."""
    edges = []
    for local in local_nodes:
        if not local.content:
            continue
        for virtual in virtual_nodes:
            if virtual.metadata.get('url') and virtual.metadata['url'] in local.content:
                edges.append(Edge(source=local.id, target=virtual.id, label='references'))
    return edges

Step 3: Virtual Node Service

# src/datacortex/connectors/virtual_nodes.py
class VirtualNodeService:
    def __init__(self, db_path: str):
        self.db_path = db_path

    def upsert_node(self, entity: ExternalEntity) -> str:
        """Create or update a virtual node from external entity."""
        node_id = f"{entity.source}:{entity.id}"
        # INSERT OR REPLACE into virtual_nodes
        return node_id

    def add_edge(self, source_node: str, target_node: str, edge_type: str) -> None:
        """Add an edge between nodes (local or virtual)."""

    def get_all_nodes(self, sources: list[str] = None) -> list[Node]:
        """Get all virtual nodes, optionally filtered by source."""

    def search_virtual(self, query: str, limit: int = 20) -> list[Node]:
        """Search virtual nodes by title/content."""

Step 4: Update Frontend

// frontend/js/graph.js
function getNodeStyle(node) {
    // Distinguish virtual nodes visually
    if (node.id.includes(':')) {  // Virtual node format: source:id
        const source = node.id.split(':')[0];
        return {
            shape: 'rect',  // Rectangle for virtual
            color: SOURCE_COLORS[source] || '#888',
            strokeDash: '5,5'  // Dashed border
        };
    }
    return {
        shape: 'circle',  // Circle for local
        color: TYPE_COLORS[node.type] || '#666',
        strokeDash: 'none'
    };
}

const SOURCE_COLORS = {
    github: '#6366f1',
    slack: '#36c5f0',
    linear: '#5e6ad2'
};

function onNodeClick(node) {
    if (node.url) {
        window.open(node.url, '_blank');
    } else {
        openInEditor(node.path);
    }
}

Step 5: API Updates

@router.get("/graph")
async def get_graph(
    include_local: bool = True,
    include_virtual: bool = True,
    sources: str = None  # comma-separated
):
    """Get unified graph with local and virtual nodes."""
    source_list = sources.split(',') if sources else None
    graph = build_unified_graph(
        config,
        include_local=include_local,
        include_virtual=include_virtual,
        sources=source_list
    )
    return graph.model_dump()

Acceptance Criteria

  • Virtual nodes stored in database
  • Unified graph includes both local and virtual nodes
  • Cross-links discovered between local references and virtual URLs
  • Virtual nodes searchable
  • Frontend distinguishes virtual nodes visually
  • Clicking virtual node opens external URL

Metadata

Metadata

Assignees

No one assigned

    Labels

    phase-4Phase 4: Multimodal Integration

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions