Skip to content
This repository was archived by the owner on Jan 2, 2025. It is now read-only.

Commit 68f5844

Browse files
committed
update semantic/chunks endpoint to use snippet deduplication
1 parent d0db2e0 commit 68f5844

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

server/bleep/src/semantic.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
547547
// - "novelty" or, the measure of how minimal the similarity is
548548
// to existing documents in the selection
549549
// The value of lambda skews the weightage in favor of either relevance or novelty.
550+
// - we add a language diversity factor to the score to encourage a range of langauges in the results
550551
// k: the number of embeddings to select
551552
pub fn deduplicate_with_mmr(
552553
query_embedding: &[f32],
@@ -580,9 +581,9 @@ pub fn deduplicate_with_mmr(
580581
}
581582
let mut equation_score = lambda * first_part - (1. - lambda) * second_part;
582583

583-
// score is MMR + (1/4)^n where n is the number of times a language has been selected
584+
// MMR + (1/2)^n where n is the number of times a language has been selected
584585
let count = lang_counts.get(languages[i]).unwrap_or(&0);
585-
equation_score += 0.25_f32.powi(*count);
586+
equation_score += 0.5_f32.powi(*count);
586587

587588
if equation_score > best_score {
588589
best_score = equation_score;

0 commit comments

Comments
 (0)