Skip to content
Merged

Dev #62

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/src/index/stages/reasoning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::document::{
TopicEntry,
};
use crate::error::Result;
use crate::retrieval::search::extract_keywords;
use crate::retrieval::scoring::extract_keywords;

use super::async_trait;
use super::{AccessPattern, IndexStage, StageResult};
Expand Down
2 changes: 1 addition & 1 deletion rust/src/retrieval/content/scorer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use std::collections::HashMap;

use crate::document::NodeId;
use crate::retrieval::search::{Bm25Params, STOPWORDS, extract_keywords};
use crate::retrieval::scoring::{Bm25Params, STOPWORDS, extract_keywords};
use crate::utils::estimate_tokens;

use super::config::ScoringStrategyConfig;
Expand Down
1 change: 1 addition & 0 deletions rust/src/retrieval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pub mod complexity;
pub mod content;
pub mod pilot;
pub mod pipeline;
pub mod scoring;
pub mod search;
pub mod stages;
pub mod strategy;
Expand Down
70 changes: 51 additions & 19 deletions rust/src/retrieval/pilot/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ impl ContextBuilder {
ctx.estimated_tokens += self.estimate_tokens(&ctx.query_section);

// Build path section
ctx.path_section = self.build_path_section(state.tree, state.path);
ctx.path_section = self.build_path_section(state.tree, state.path, state.step_reasons);
ctx.estimated_tokens += self.estimate_tokens(&ctx.path_section);

// Build candidates section
Expand Down Expand Up @@ -439,7 +439,7 @@ impl ContextBuilder {
// Show failed path
ctx.path_section = format!(
"Failed path:\n{}",
self.build_path_section(state.tree, failed_path)
self.build_path_section(state.tree, failed_path, None)
);
ctx.estimated_tokens += self.estimate_tokens(&ctx.path_section);

Expand All @@ -463,35 +463,67 @@ impl ContextBuilder {
format!("User Query:\n{}\n", truncated)
}

/// Build current path section.
fn build_path_section(&self, tree: &DocumentTree, path: &[NodeId]) -> String {
/// Build current path section with optional per-step reasoning.
fn build_path_section(
&self,
tree: &DocumentTree,
path: &[NodeId],
step_reasons: Option<&[Option<String>]>,
) -> String {
if path.is_empty() {
return "Current Position: Root\n".to_string();
}

let mut result = String::from("Current Path:\n");
result.push_str("Root");
let has_reasons = step_reasons
.map(|r| r.iter().any(|x| x.is_some()))
.unwrap_or(false);

// Limit depth shown
let max_depth = self.effective_max_path_depth();
let start = if path.len() > max_depth {
path.len() - max_depth
} else {
0
};
if !has_reasons {
// Original breadcrumb format when no reasoning available
let mut result = String::from("Current Path:\n");
result.push_str("Root");

let max_depth = self.effective_max_path_depth();
let start = if path.len() > max_depth {
path.len() - max_depth
} else {
0
};

if start > 0 {
result.push_str(" → ...");
if start > 0 {
result.push_str(" → ...");
}

for node_id in path.iter().skip(start) {
if let Some(node) = tree.get(*node_id) {
result.push_str(" → ");
result.push_str(&node.title);
}
}

result.push('\n');
return result;
}

for node_id in path.iter().skip(start) {
// Enhanced format with per-step reasoning
let mut result = String::from("Navigation History:\n");
let reasons = step_reasons.unwrap();

for (i, node_id) in path.iter().enumerate() {
if let Some(node) = tree.get(*node_id) {
result.push_str(" → ");
result.push_str(&node.title);
let reason = reasons
.get(i)
.and_then(|r| r.as_deref())
.unwrap_or("(automatic selection)");
result.push_str(&format!(
" Step {}: {} — because: {}\n",
i + 1,
node.title,
reason
));
}
}

result.push('\n');
result
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,54 @@ pub async fn score_candidates(
visited: &HashSet<NodeId>,
pilot_weight: f32,
cache: Option<&PilotDecisionCache>,
step_reasons: Option<&[Option<String>]>,
) -> Vec<(NodeId, f32)> {
let scored = score_candidates_detailed(
tree, candidates, query, pilot, path, visited, pilot_weight, cache, step_reasons,
)
.await;
scored.into_iter().map(|s| (s.node_id, s.score)).collect()
}

/// A scored candidate with optional reasoning from the Pilot.
#[derive(Debug, Clone)]
pub struct ScoredCandidate {
/// The node ID.
pub node_id: NodeId,
/// Relevance score (0.0 - 1.0).
pub score: f32,
/// Reason the Pilot chose this node, if available.
pub reason: Option<String>,
}

/// Score child candidates and return detailed results with reasons.
///
/// Like [`score_candidates`] but preserves per-candidate reasoning
/// from the Pilot. Use this when the search algorithm needs to
/// record why each path step was taken (e.g., for beam search
/// reasoning history).
pub async fn score_candidates_detailed(
tree: &DocumentTree,
candidates: &[NodeId],
query: &str,
pilot: Option<&dyn Pilot>,
path: &[NodeId],
visited: &HashSet<NodeId>,
pilot_weight: f32,
cache: Option<&PilotDecisionCache>,
step_reasons: Option<&[Option<String>]>,
) -> Vec<ScoredCandidate> {
if candidates.is_empty() {
return Vec::new();
}

// If no Pilot, pure NodeScorer
// If no Pilot, pure NodeScorer (no reasons available)
let Some(p) = pilot else {
return score_with_scorer(tree, candidates, query);
return score_with_scorer_detailed(tree, candidates, query);
};

if !p.is_active() {
return score_with_scorer(tree, candidates, query);
return score_with_scorer_detailed(tree, candidates, query);
}

// Determine parent node (last in path) for cache key
Expand All @@ -109,20 +145,22 @@ pub async fn score_candidates(
tracing::trace!("Pilot cache hit for parent={:?}", parent);
cached
} else {
let state = SearchState::new(tree, query, path, candidates, visited);
let mut state = SearchState::new(tree, query, path, candidates, visited);
state.step_reasons = step_reasons;
let d = p.decide(&state).await;
c.put(query, parent, &d).await;
d
}
} else {
let state = SearchState::new(tree, query, path, candidates, visited);
let mut state = SearchState::new(tree, query, path, candidates, visited);
state.step_reasons = step_reasons;
p.decide(&state).await
};

// Build Pilot score map
let mut pilot_scores: HashMap<NodeId, f32> = HashMap::new();
// Build Pilot score + reason map
let mut pilot_data: HashMap<NodeId, (f32, Option<String>)> = HashMap::new();
for ranked in &decision.ranked_candidates {
pilot_scores.insert(ranked.node_id, ranked.score);
pilot_data.insert(ranked.node_id, (ranked.score, ranked.reason.clone()));
}

// Compute NodeScorer fallback scores
Expand All @@ -132,24 +170,26 @@ pub async fn score_candidates(

let scorer = NodeScorer::new(ScoringContext::new(query));

let mut scored: Vec<(NodeId, f32)> = candidates
let mut scored: Vec<ScoredCandidate> = candidates
.iter()
.map(|&node_id| {
let algo_score = scorer.score(tree, node_id);
let p_score = pilot_scores.get(&node_id).copied().unwrap_or(0.0);
let (p_score, reason) = pilot_data.get(&node_id)
.map(|(s, r)| (*s, r.clone()))
.unwrap_or((0.0, None));

let final_score = if effective_pilot > 0.0 && pilot_scores.contains_key(&node_id) {
let final_score = if effective_pilot > 0.0 && pilot_data.contains_key(&node_id) {
(effective_pilot * p_score + scorer_weight * algo_score)
/ (effective_pilot + scorer_weight)
} else {
algo_score
};

(node_id, final_score)
ScoredCandidate { node_id, score: final_score, reason }
})
.collect();

scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
scored
}

Expand All @@ -163,6 +203,20 @@ fn score_with_scorer(
scorer.score_and_sort(tree, candidates)
}

/// Pure NodeScorer fallback returning detailed results (no reasons).
fn score_with_scorer_detailed(
tree: &DocumentTree,
candidates: &[NodeId],
query: &str,
) -> Vec<ScoredCandidate> {
let scorer = NodeScorer::new(ScoringContext::new(query));
scorer
.score_and_sort(tree, candidates)
.into_iter()
.map(|(node_id, score)| ScoredCandidate { node_id, score, reason: None })
.collect()
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
5 changes: 4 additions & 1 deletion rust/src/retrieval/pilot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ mod builder;
mod complexity;
mod config;
mod decision;
mod decision_scorer;
mod fallback;
mod feedback;
mod llm_pilot;
Expand All @@ -43,10 +44,12 @@ mod noop;
mod parser;
mod prompts;
mod r#trait;
mod scorer;

pub use complexity::detect_with_llm;
pub use config::PilotConfig;
pub use decision::{InterventionPoint, PilotDecision};

pub use decision_scorer::{PilotDecisionCache, ScoredCandidate, score_candidates, score_candidates_detailed};
pub use llm_pilot::LlmPilot;
pub use r#trait::{Pilot, SearchState};
pub use scorer::{NodeScorer, ScoringContext};
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ use std::collections::HashMap;

use crate::document::{DocumentTree, NodeId};

use super::bm25::Bm25Params;
use crate::retrieval::scoring::bm25::Bm25Params;

// Re-export extract_keywords for other modules to use
pub use super::bm25::extract_keywords;
pub use crate::retrieval::scoring::bm25::extract_keywords;

/// Scoring strategy to use.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
Expand Down
7 changes: 7 additions & 0 deletions rust/src/retrieval/pilot/trait.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ pub struct SearchState<'a> {
pub best_score: f32,
/// Whether the search is currently backtracking.
pub is_backtracking: bool,
/// Per-step reasoning for why each node in `path` was chosen.
///
/// Same length as `path` when present. `None` means no reasoning
/// history is available (e.g. first iteration, algorithm-only mode).
pub step_reasons: Option<&'a [Option<String>]>,
}

impl<'a> SearchState<'a> {
Expand All @@ -63,6 +68,7 @@ impl<'a> SearchState<'a> {
iteration: 0,
best_score: 0.0,
is_backtracking: false,
step_reasons: None,
}
}

Expand All @@ -78,6 +84,7 @@ impl<'a> SearchState<'a> {
iteration: 0,
best_score: 0.0,
is_backtracking: false,
step_reasons: None,
}
}

Expand Down
File renamed without changes.
12 changes: 12 additions & 0 deletions rust/src/retrieval/scoring/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0

//! Scoring utilities for text relevance assessment.
//!
//! This module provides text scoring algorithms (BM25, keyword matching)
//! that are used across the retrieval pipeline. These are general-purpose
//! tools, not tied to any specific search algorithm.

pub mod bm25;

pub use bm25::{Bm25Engine, Bm25Params, FieldDocument, STOPWORDS, extract_keywords};
Loading