Skip to content

Commit 3a341ac

Browse files
committed
feat: implement document graph functionality for cross-document relationships
- Add DocumentGraph and related types (DocumentGraphNode, GraphEdge, etc.) - Create graph module with builder, config, and type definitions - Move graph builder from src/index to src/graph module - Implement automatic graph rebuilding after document indexing - Add graph-aware retrieval with boost factor support - Integrate document graph into query pipeline context - Add graph configuration options to main Config struct - Implement keyword extraction from ReasoningIndex for graph building - Add workspace methods to persist/load document graph - Update retrieval strategies to utilize graph connections - Add validation for graph configuration parameters
1 parent 6efb9b2 commit 3a341ac

File tree

16 files changed

+548
-367
lines changed

16 files changed

+548
-367
lines changed

rust/src/client/engine.rs

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
//! # }
3636
//! ```
3737
38+
use std::collections::HashMap;
3839
use std::sync::Arc;
3940

4041
use futures::StreamExt;
@@ -45,7 +46,7 @@ use crate::error::Result;
4546
use crate::index::PipelineOptions;
4647
use crate::index::incremental::{self, IndexAction};
4748
use crate::retrieval::{PipelineRetriever, RetrieveEventReceiver};
48-
use crate::storage::Workspace;
49+
use crate::storage::{PersistedDocument, Workspace};
4950
use crate::utils::fingerprint::Fingerprint;
5051
use crate::{DocumentTree, Error};
5152

@@ -166,6 +167,11 @@ impl Engine {
166167
failed.len()
167168
)));
168169
}
170+
if !items.is_empty() {
171+
if let Err(e) = self.rebuild_graph().await {
172+
tracing::warn!("Graph rebuild failed: {}", e);
173+
}
174+
}
169175
return Ok(IndexResult::with_partial(items, failed));
170176
}
171177

@@ -200,6 +206,13 @@ impl Engine {
200206
)));
201207
}
202208

209+
// Rebuild document graph after successful batch index
210+
if !items.is_empty() {
211+
if let Err(e) = self.rebuild_graph().await {
212+
tracing::warn!("Graph rebuild failed: {}", e);
213+
}
214+
}
215+
203216
Ok(IndexResult::with_partial(items, failed))
204217
}
205218

@@ -345,7 +358,16 @@ impl Engine {
345358
/// ```
346359
pub async fn query(&self, ctx: QueryContext) -> Result<QueryResult> {
347360
let doc_ids = self.resolve_scope(&ctx.scope).await?;
348-
let options = ctx.to_retrieve_options(&self.config);
361+
let mut options = ctx.to_retrieve_options(&self.config);
362+
363+
// Load document graph for graph-aware retrieval (if enabled)
364+
if self.config.graph.enabled {
365+
if let Some(ref workspace) = self.workspace {
366+
if let Ok(Some(graph)) = workspace.get_graph().await {
367+
options = options.with_document_graph(Arc::new(graph));
368+
}
369+
}
370+
}
349371

350372
let mut items = Vec::with_capacity(doc_ids.len());
351373
let mut failed = Vec::new();
@@ -508,6 +530,51 @@ impl Engine {
508530
}
509531
}
510532

533+
/// Rebuild the document graph after indexing, if graph is enabled.
534+
async fn rebuild_graph(&self) -> Result<()> {
535+
if !self.config.graph.enabled {
536+
return Ok(());
537+
}
538+
let workspace = match self.workspace {
539+
Some(ref ws) => ws,
540+
None => return Ok(()),
541+
};
542+
543+
// Load all documents and extract keyword profiles
544+
let doc_ids = workspace.inner().list_documents().await;
545+
let mut builder = crate::graph::DocumentGraphBuilder::new(self.config.graph.clone());
546+
547+
for doc_id in &doc_ids {
548+
if let Some(doc) = workspace.load(doc_id).await? {
549+
let keywords = Self::extract_keywords_from_doc(&doc);
550+
builder.add_document(
551+
&doc.meta.id,
552+
&doc.meta.name,
553+
&doc.meta.format,
554+
doc.meta.node_count,
555+
keywords,
556+
);
557+
}
558+
}
559+
560+
let graph = builder.build();
561+
workspace.set_graph(&graph).await?;
562+
Ok(())
563+
}
564+
565+
/// Extract keyword → weight map from a persisted document's ReasoningIndex.
566+
fn extract_keywords_from_doc(doc: &PersistedDocument) -> HashMap<String, f32> {
567+
let mut keywords = HashMap::new();
568+
if let Some(ref ri) = doc.reasoning_index {
569+
for (kw, entries) in ri.all_topic_entries() {
570+
let weight: f32 =
571+
entries.iter().map(|e| e.weight).sum::<f32>() / entries.len().max(1) as f32;
572+
keywords.insert(kw.clone(), weight);
573+
}
574+
}
575+
keywords
576+
}
577+
511578
/// Resolve what action to take for a source.
512579
async fn resolve_index_action(
513580
&self,

rust/src/client/workspace.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,16 @@ impl WorkspaceClient {
299299
pub async fn find_by_source_path(&self, path: &std::path::Path) -> Option<String> {
300300
self.workspace.find_by_source_path(path).await
301301
}
302+
303+
/// Get the document graph, loading from backend if not cached.
304+
pub async fn get_graph(&self) -> Result<Option<crate::graph::DocumentGraph>> {
305+
self.workspace.get_graph().await
306+
}
307+
308+
/// Persist the document graph to the backend.
309+
pub async fn set_graph(&self, graph: &crate::graph::DocumentGraph) -> Result<()> {
310+
self.workspace.set_graph(graph).await
311+
}
302312
}
303313

304314
/// Workspace statistics.

rust/src/config/types/mod.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ pub struct Config {
6868
#[serde(default)]
6969
pub concurrency: ConcurrencyConfig,
7070

71+
/// Document graph configuration.
72+
#[serde(default)]
73+
pub graph: crate::graph::DocumentGraphConfig,
74+
7175
/// Fallback/error recovery configuration (legacy, prefer llm.fallback).
7276
#[serde(default)]
7377
pub fallback: FallbackConfig,
@@ -83,6 +87,7 @@ impl Default for Config {
8387
retrieval: RetrievalConfig::default(),
8488
storage: StorageConfig::default(),
8589
concurrency: ConcurrencyConfig::default(),
90+
graph: crate::graph::DocumentGraphConfig::default(),
8691
fallback: FallbackConfig::default(),
8792
}
8893
}
@@ -136,6 +141,12 @@ impl Config {
136141
self
137142
}
138143

144+
/// Set the document graph configuration.
145+
pub fn with_graph(mut self, graph: crate::graph::DocumentGraphConfig) -> Self {
146+
self.graph = graph;
147+
self
148+
}
149+
139150
/// Set the fallback configuration.
140151
pub fn with_fallback(mut self, fallback: FallbackConfig) -> Self {
141152
self.fallback = fallback;
@@ -209,6 +220,20 @@ impl Config {
209220
));
210221
}
211222

223+
// Validate graph
224+
if self.graph.min_keyword_jaccard < 0.0 || self.graph.min_keyword_jaccard > 1.0 {
225+
errors.push(ValidationError::error(
226+
"graph.min_keyword_jaccard",
227+
"Must be between 0.0 and 1.0",
228+
));
229+
}
230+
if self.graph.max_edges_per_node == 0 {
231+
errors.push(ValidationError::error(
232+
"graph.max_edges_per_node",
233+
"Must be greater than 0",
234+
));
235+
}
236+
212237
// Validate fallback
213238
if self.fallback.enabled && self.fallback.models.is_empty() {
214239
errors.push(ValidationError::warning(

0 commit comments

Comments
 (0)