@@ -4,12 +4,15 @@ use std::collections::HashMap;
44use chrono:: Utc ;
55use fst:: IntoStreamer ;
66use heed:: types:: ByteSlice ;
7+ use heed:: { BytesDecode , BytesEncode } ;
78use roaring:: RoaringBitmap ;
89use serde_json:: Value ;
910
1011use super :: ClearDocuments ;
11- use crate :: error:: { InternalError , UserError } ;
12- use crate :: heed_codec:: facet:: FacetStringLevelZeroValueCodec ;
12+ use crate :: error:: { InternalError , SerializationError , UserError } ;
13+ use crate :: heed_codec:: facet:: {
14+ FacetLevelValueU32Codec , FacetStringLevelZeroValueCodec , FacetStringZeroBoundsValueCodec ,
15+ } ;
1316use crate :: heed_codec:: CboRoaringBitmapCodec ;
1417use crate :: index:: { db_name, main_key} ;
1518use crate :: { DocumentId , ExternalDocumentsIds , FieldId , Index , Result , SmallString32 , BEU32 } ;
@@ -451,26 +454,61 @@ where
451454 Ok ( ( ) )
452455}
453456
454- fn remove_docids_from_facet_field_id_string_docids < ' a , C > (
457+ fn remove_docids_from_facet_field_id_string_docids < ' a , C , D > (
455458 wtxn : & ' a mut heed:: RwTxn ,
456- db : & heed:: Database < C , FacetStringLevelZeroValueCodec < CboRoaringBitmapCodec > > ,
459+ db : & heed:: Database < C , D > ,
457460 to_remove : & RoaringBitmap ,
458- ) -> heed:: Result < ( ) >
459- where
460- C : heed:: BytesDecode < ' a > + heed:: BytesEncode < ' a > ,
461- {
462- let mut iter = db. remap_key_type :: < ByteSlice > ( ) . iter_mut ( wtxn) ?;
461+ ) -> crate :: Result < ( ) > {
462+ let db_name = Some ( crate :: index:: db_name:: FACET_ID_STRING_DOCIDS ) ;
463+ let mut iter = db. remap_types :: < ByteSlice , ByteSlice > ( ) . iter_mut ( wtxn) ?;
463464 while let Some ( result) = iter. next ( ) {
464- let ( bytes, ( original_value, mut docids) ) = result?;
465- let previous_len = docids. len ( ) ;
466- docids -= to_remove;
467- if docids. is_empty ( ) {
468- // safety: we don't keep references from inside the LMDB database.
469- unsafe { iter. del_current ( ) ? } ;
470- } else if docids. len ( ) != previous_len {
471- let bytes = bytes. to_owned ( ) ;
472- // safety: we don't keep references from inside the LMDB database.
473- unsafe { iter. put_current ( & bytes, & ( original_value, docids) ) ? } ;
465+ let ( key, val) = result?;
466+ match FacetLevelValueU32Codec :: bytes_decode ( key) {
467+ Some ( _) => {
468+ // If we are able to parse this key it means it is a facet string group
469+ // level key. We must then parse the value using the appropriate codec.
470+ let ( group, mut docids) =
471+ FacetStringZeroBoundsValueCodec :: < CboRoaringBitmapCodec > :: bytes_decode ( val)
472+ . ok_or_else ( || SerializationError :: Decoding { db_name } ) ?;
473+
474+ let previous_len = docids. len ( ) ;
475+ docids -= to_remove;
476+ if docids. is_empty ( ) {
477+ // safety: we don't keep references from inside the LMDB database.
478+ unsafe { iter. del_current ( ) ? } ;
479+ } else if docids. len ( ) != previous_len {
480+ let key = key. to_owned ( ) ;
481+ let val = & ( group, docids) ;
482+ let value_bytes =
483+ FacetStringZeroBoundsValueCodec :: < CboRoaringBitmapCodec > :: bytes_encode ( val)
484+ . ok_or_else ( || SerializationError :: Encoding { db_name } ) ?;
485+
486+ // safety: we don't keep references from inside the LMDB database.
487+ unsafe { iter. put_current ( & key, & value_bytes) ? } ;
488+ }
489+ }
490+ None => {
491+ // The key corresponds to a level zero facet string.
492+ let ( original_value, mut docids) =
493+ FacetStringLevelZeroValueCodec :: < CboRoaringBitmapCodec > :: bytes_decode ( val)
494+ . ok_or_else ( || SerializationError :: Decoding { db_name } ) ?;
495+
496+ let previous_len = docids. len ( ) ;
497+ docids -= to_remove;
498+ if docids. is_empty ( ) {
499+ // safety: we don't keep references from inside the LMDB database.
500+ unsafe { iter. del_current ( ) ? } ;
501+ } else if docids. len ( ) != previous_len {
502+ let key = key. to_owned ( ) ;
503+ let val = & ( original_value, docids) ;
504+ let value_bytes =
505+ FacetStringLevelZeroValueCodec :: < CboRoaringBitmapCodec > :: bytes_encode ( val)
506+ . ok_or_else ( || SerializationError :: Encoding { db_name } ) ?;
507+
508+ // safety: we don't keep references from inside the LMDB database.
509+ unsafe { iter. put_current ( & key, & value_bytes) ? } ;
510+ }
511+ }
474512 }
475513 }
476514
@@ -505,10 +543,13 @@ where
505543
506544#[ cfg( test) ]
507545mod tests {
546+ use big_s:: S ;
508547 use heed:: EnvOpenOptions ;
548+ use maplit:: hashset;
509549
510550 use super :: * ;
511- use crate :: update:: { IndexDocuments , UpdateFormat } ;
551+ use crate :: update:: { IndexDocuments , Settings , UpdateFormat } ;
552+ use crate :: FilterCondition ;
512553
513554 #[ test]
514555 fn delete_documents_with_numbers_as_primary_key ( ) {
@@ -566,4 +607,55 @@ mod tests {
566607
567608 wtxn. commit ( ) . unwrap ( ) ;
568609 }
610+
611+ #[ test]
612+ fn delete_documents_with_filterable_attributes ( ) {
613+ let path = tempfile:: tempdir ( ) . unwrap ( ) ;
614+ let mut options = EnvOpenOptions :: new ( ) ;
615+ options. map_size ( 10 * 1024 * 1024 ) ; // 10 MB
616+ let index = Index :: new ( options, & path) . unwrap ( ) ;
617+
618+ let mut wtxn = index. write_txn ( ) . unwrap ( ) ;
619+ let mut builder = Settings :: new ( & mut wtxn, & index, 0 ) ;
620+ builder. set_primary_key ( S ( "docid" ) ) ;
621+ builder. set_filterable_fields ( hashset ! { S ( "label" ) } ) ;
622+ builder. execute ( |_, _| ( ) ) . unwrap ( ) ;
623+
624+ let content = & br#"[
625+ {"docid":"1_4","label":"sign"},
626+ {"docid":"1_5","label":"letter"},
627+ {"docid":"1_7","label":"abstract,cartoon,design,pattern"},
628+ {"docid":"1_36","label":"drawing,painting,pattern"},
629+ {"docid":"1_37","label":"art,drawing,outdoor"},
630+ {"docid":"1_38","label":"aquarium,art,drawing"},
631+ {"docid":"1_39","label":"abstract"},
632+ {"docid":"1_40","label":"cartoon"},
633+ {"docid":"1_41","label":"art,drawing"},
634+ {"docid":"1_42","label":"art,pattern"},
635+ {"docid":"1_43","label":"abstract,art,drawing,pattern"},
636+ {"docid":"1_44","label":"drawing"},
637+ {"docid":"1_45","label":"art"},
638+ {"docid":"1_46","label":"abstract,colorfulness,pattern"},
639+ {"docid":"1_47","label":"abstract,pattern"},
640+ {"docid":"1_52","label":"abstract,cartoon"},
641+ {"docid":"1_57","label":"abstract,drawing,pattern"},
642+ {"docid":"1_58","label":"abstract,art,cartoon"},
643+ {"docid":"1_68","label":"design"},
644+ {"docid":"1_69","label":"geometry"}
645+ ]"# [ ..] ;
646+ let mut builder = IndexDocuments :: new ( & mut wtxn, & index, 0 ) ;
647+ builder. update_format ( UpdateFormat :: Json ) ;
648+ builder. execute ( content, |_, _| ( ) ) . unwrap ( ) ;
649+
650+ // Delete not all of the documents but some of them.
651+ let mut builder = DeleteDocuments :: new ( & mut wtxn, & index, 1 ) . unwrap ( ) ;
652+ builder. delete_external_id ( "1_4" ) ;
653+ builder. execute ( ) . unwrap ( ) ;
654+
655+ let filter = FilterCondition :: from_str ( & wtxn, & index, "label = sign" ) . unwrap ( ) ;
656+ let results = index. search ( & wtxn) . filter ( filter) . execute ( ) . unwrap ( ) ;
657+ assert ! ( results. documents_ids. is_empty( ) ) ;
658+
659+ wtxn. commit ( ) . unwrap ( ) ;
660+ }
569661}
0 commit comments