@@ -577,4 +577,101 @@ fn resolve_plane_sweep_candidates(
577577}
578578
579579#[ cfg( test) ]
580- mod tests { }
580+ mod tests {
581+ use std:: io:: Cursor ;
582+
583+ use big_s:: S ;
584+
585+ use crate :: documents:: { DocumentsBatchBuilder , DocumentsBatchReader } ;
586+ use crate :: index:: tests:: TempIndex ;
587+ use crate :: SearchResult ;
588+
589+ fn documents_with_enough_different_words_for_prefixes ( prefixes : & [ & str ] ) -> Vec < crate :: Object > {
590+ let mut documents = Vec :: new ( ) ;
591+ for prefix in prefixes {
592+ for i in 0 ..500 {
593+ documents. push (
594+ serde_json:: json!( {
595+ "text" : format!( "{prefix}{i:x}" ) ,
596+ } )
597+ . as_object ( )
598+ . unwrap ( )
599+ . clone ( ) ,
600+ )
601+ }
602+ }
603+ documents
604+ }
605+
606+ #[ test]
607+ fn test_proximity_criterion_prefix_handling ( ) {
608+ let mut index = TempIndex :: new ( ) ;
609+ index. index_documents_config . autogenerate_docids = true ;
610+
611+ index
612+ . update_settings ( |settings| {
613+ settings. set_primary_key ( S ( "id" ) ) ;
614+ settings. set_criteria ( vec ! [
615+ "words" . to_owned( ) ,
616+ "typo" . to_owned( ) ,
617+ "proximity" . to_owned( ) ,
618+ ] ) ;
619+ } )
620+ . unwrap ( ) ;
621+
622+ let mut documents = DocumentsBatchBuilder :: new ( Vec :: new ( ) ) ;
623+
624+ for doc in [
625+ // 0
626+ serde_json:: json!( { "text" : "zero is exactly the amount of configuration I want" } ) ,
627+ // 1
628+ serde_json:: json!( { "text" : "zero bad configuration" } ) ,
629+ // 2
630+ serde_json:: json!( { "text" : "zero configuration" } ) ,
631+ // 3
632+ serde_json:: json!( { "text" : "zero config" } ) ,
633+ // 4
634+ serde_json:: json!( { "text" : "zero conf" } ) ,
635+ // 5
636+ serde_json:: json!( { "text" : "zero bad conf" } ) ,
637+ ] {
638+ documents. append_json_object ( doc. as_object ( ) . unwrap ( ) ) . unwrap ( ) ;
639+ }
640+ for doc in documents_with_enough_different_words_for_prefixes ( & [ "conf" ] ) {
641+ documents. append_json_object ( & doc) . unwrap ( ) ;
642+ }
643+ let documents =
644+ DocumentsBatchReader :: from_reader ( Cursor :: new ( documents. into_inner ( ) . unwrap ( ) ) )
645+ . unwrap ( ) ;
646+
647+ index. add_documents ( documents) . unwrap ( ) ;
648+
649+ let rtxn = index. read_txn ( ) . unwrap ( ) ;
650+
651+ let SearchResult { matching_words : _, candidates : _, documents_ids } =
652+ index. search ( & rtxn) . query ( "zero c" ) . execute ( ) . unwrap ( ) ;
653+ insta:: assert_snapshot!( format!( "{documents_ids:?}" ) , @"[2, 3, 4, 1, 5, 0]" ) ;
654+
655+ let SearchResult { matching_words : _, candidates : _, documents_ids } =
656+ index. search ( & rtxn) . query ( "zero co" ) . execute ( ) . unwrap ( ) ;
657+ insta:: assert_snapshot!( format!( "{documents_ids:?}" ) , @"[2, 3, 4, 1, 5, 0]" ) ;
658+
659+ let SearchResult { matching_words : _, candidates : _, documents_ids } =
660+ index. search ( & rtxn) . query ( "zero con" ) . execute ( ) . unwrap ( ) ;
661+ // Here searh results are degraded because `con` is in the prefix cache but it is too
662+ // long to be stored in the prefix proximity databases, and we don't want to iterate over
663+ // all of its word derivations
664+ insta:: assert_snapshot!( format!( "{documents_ids:?}" ) , @"[0, 1, 2, 3, 4, 5]" ) ;
665+
666+ let SearchResult { matching_words : _, candidates : _, documents_ids } =
667+ index. search ( & rtxn) . query ( "zero conf" ) . execute ( ) . unwrap ( ) ;
668+ // Here search results are degraded as well, but we can still rank correctly documents
669+ // that contain `conf` exactly, and not as a prefix.
670+ insta:: assert_snapshot!( format!( "{documents_ids:?}" ) , @"[4, 5, 0, 1, 2, 3]" ) ;
671+
672+ let SearchResult { matching_words : _, candidates : _, documents_ids } =
673+ index. search ( & rtxn) . query ( "zero config" ) . execute ( ) . unwrap ( ) ;
674+ // `config` is not a common prefix, so the normal methods are used
675+ insta:: assert_snapshot!( format!( "{documents_ids:?}" ) , @"[2, 3, 1, 0, 4, 5]" ) ;
676+ }
677+ }
0 commit comments