Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 6fafc3e

Browse files
committed
Add unit test for prefix handling by the proximity criterion
1 parent 99b6532 commit 6fafc3e

File tree

1 file changed

+98
-1
lines changed

1 file changed

+98
-1
lines changed

milli/src/search/criteria/proximity.rs

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,4 +577,101 @@ fn resolve_plane_sweep_candidates(
577577
}
578578

579579
#[cfg(test)]
580-
mod tests {}
580+
mod tests {
581+
use std::io::Cursor;
582+
583+
use big_s::S;
584+
585+
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
586+
use crate::index::tests::TempIndex;
587+
use crate::SearchResult;
588+
589+
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
590+
let mut documents = Vec::new();
591+
for prefix in prefixes {
592+
for i in 0..500 {
593+
documents.push(
594+
serde_json::json!({
595+
"text": format!("{prefix}{i:x}"),
596+
})
597+
.as_object()
598+
.unwrap()
599+
.clone(),
600+
)
601+
}
602+
}
603+
documents
604+
}
605+
606+
#[test]
607+
fn test_proximity_criterion_prefix_handling() {
608+
let mut index = TempIndex::new();
609+
index.index_documents_config.autogenerate_docids = true;
610+
611+
index
612+
.update_settings(|settings| {
613+
settings.set_primary_key(S("id"));
614+
settings.set_criteria(vec![
615+
"words".to_owned(),
616+
"typo".to_owned(),
617+
"proximity".to_owned(),
618+
]);
619+
})
620+
.unwrap();
621+
622+
let mut documents = DocumentsBatchBuilder::new(Vec::new());
623+
624+
for doc in [
625+
// 0
626+
serde_json::json!({ "text": "zero is exactly the amount of configuration I want" }),
627+
// 1
628+
serde_json::json!({ "text": "zero bad configuration" }),
629+
// 2
630+
serde_json::json!({ "text": "zero configuration" }),
631+
// 3
632+
serde_json::json!({ "text": "zero config" }),
633+
// 4
634+
serde_json::json!({ "text": "zero conf" }),
635+
// 5
636+
serde_json::json!({ "text": "zero bad conf" }),
637+
] {
638+
documents.append_json_object(doc.as_object().unwrap()).unwrap();
639+
}
640+
for doc in documents_with_enough_different_words_for_prefixes(&["conf"]) {
641+
documents.append_json_object(&doc).unwrap();
642+
}
643+
let documents =
644+
DocumentsBatchReader::from_reader(Cursor::new(documents.into_inner().unwrap()))
645+
.unwrap();
646+
647+
index.add_documents(documents).unwrap();
648+
649+
let rtxn = index.read_txn().unwrap();
650+
651+
let SearchResult { matching_words: _, candidates: _, documents_ids } =
652+
index.search(&rtxn).query("zero c").execute().unwrap();
653+
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 1, 5, 0]");
654+
655+
let SearchResult { matching_words: _, candidates: _, documents_ids } =
656+
index.search(&rtxn).query("zero co").execute().unwrap();
657+
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 1, 5, 0]");
658+
659+
let SearchResult { matching_words: _, candidates: _, documents_ids } =
660+
index.search(&rtxn).query("zero con").execute().unwrap();
661+
// Here searh results are degraded because `con` is in the prefix cache but it is too
662+
// long to be stored in the prefix proximity databases, and we don't want to iterate over
663+
// all of its word derivations
664+
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]");
665+
666+
let SearchResult { matching_words: _, candidates: _, documents_ids } =
667+
index.search(&rtxn).query("zero conf").execute().unwrap();
668+
// Here search results are degraded as well, but we can still rank correctly documents
669+
// that contain `conf` exactly, and not as a prefix.
670+
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 2, 3]");
671+
672+
let SearchResult { matching_words: _, candidates: _, documents_ids } =
673+
index.search(&rtxn).query("zero config").execute().unwrap();
674+
// `config` is not a common prefix, so the normal methods are used
675+
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 4, 5]");
676+
}
677+
}

0 commit comments

Comments
 (0)