Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit c084f7f

Browse files
committed
Fix the facet string docids filterable deletion bug
1 parent 0d1f83b commit c084f7f

File tree

1 file changed

+112
-20
lines changed

1 file changed

+112
-20
lines changed

milli/src/update/delete_documents.rs

Lines changed: 112 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@ use std::collections::HashMap;
44
use chrono::Utc;
55
use fst::IntoStreamer;
66
use heed::types::ByteSlice;
7+
use heed::{BytesDecode, BytesEncode};
78
use roaring::RoaringBitmap;
89
use serde_json::Value;
910

1011
use super::ClearDocuments;
11-
use crate::error::{InternalError, UserError};
12-
use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
12+
use crate::error::{InternalError, SerializationError, UserError};
13+
use crate::heed_codec::facet::{
14+
FacetLevelValueU32Codec, FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec,
15+
};
1316
use crate::heed_codec::CboRoaringBitmapCodec;
1417
use crate::index::{db_name, main_key};
1518
use crate::{DocumentId, ExternalDocumentsIds, FieldId, Index, Result, SmallString32, BEU32};
@@ -451,26 +454,61 @@ where
451454
Ok(())
452455
}
453456

454-
fn remove_docids_from_facet_field_id_string_docids<'a, C>(
457+
fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
455458
wtxn: &'a mut heed::RwTxn,
456-
db: &heed::Database<C, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
459+
db: &heed::Database<C, D>,
457460
to_remove: &RoaringBitmap,
458-
) -> heed::Result<()>
459-
where
460-
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
461-
{
462-
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
461+
) -> crate::Result<()> {
462+
let db_name = Some(crate::index::db_name::FACET_ID_STRING_DOCIDS);
463+
let mut iter = db.remap_types::<ByteSlice, ByteSlice>().iter_mut(wtxn)?;
463464
while let Some(result) = iter.next() {
464-
let (bytes, (original_value, mut docids)) = result?;
465-
let previous_len = docids.len();
466-
docids -= to_remove;
467-
if docids.is_empty() {
468-
// safety: we don't keep references from inside the LMDB database.
469-
unsafe { iter.del_current()? };
470-
} else if docids.len() != previous_len {
471-
let bytes = bytes.to_owned();
472-
// safety: we don't keep references from inside the LMDB database.
473-
unsafe { iter.put_current(&bytes, &(original_value, docids))? };
465+
let (key, val) = result?;
466+
match FacetLevelValueU32Codec::bytes_decode(key) {
467+
Some(_) => {
468+
// If we are able to parse this key it means it is a facet string group
469+
// level key. We must then parse the value using the appropriate codec.
470+
let (group, mut docids) =
471+
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
472+
.ok_or_else(|| SerializationError::Decoding { db_name })?;
473+
474+
let previous_len = docids.len();
475+
docids -= to_remove;
476+
if docids.is_empty() {
477+
// safety: we don't keep references from inside the LMDB database.
478+
unsafe { iter.del_current()? };
479+
} else if docids.len() != previous_len {
480+
let key = key.to_owned();
481+
let val = &(group, docids);
482+
let value_bytes =
483+
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
484+
.ok_or_else(|| SerializationError::Encoding { db_name })?;
485+
486+
// safety: we don't keep references from inside the LMDB database.
487+
unsafe { iter.put_current(&key, &value_bytes)? };
488+
}
489+
}
490+
None => {
491+
// The key corresponds to a level zero facet string.
492+
let (original_value, mut docids) =
493+
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
494+
.ok_or_else(|| SerializationError::Decoding { db_name })?;
495+
496+
let previous_len = docids.len();
497+
docids -= to_remove;
498+
if docids.is_empty() {
499+
// safety: we don't keep references from inside the LMDB database.
500+
unsafe { iter.del_current()? };
501+
} else if docids.len() != previous_len {
502+
let key = key.to_owned();
503+
let val = &(original_value, docids);
504+
let value_bytes =
505+
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
506+
.ok_or_else(|| SerializationError::Encoding { db_name })?;
507+
508+
// safety: we don't keep references from inside the LMDB database.
509+
unsafe { iter.put_current(&key, &value_bytes)? };
510+
}
511+
}
474512
}
475513
}
476514

@@ -505,10 +543,13 @@ where
505543

506544
#[cfg(test)]
507545
mod tests {
546+
use big_s::S;
508547
use heed::EnvOpenOptions;
548+
use maplit::hashset;
509549

510550
use super::*;
511-
use crate::update::{IndexDocuments, UpdateFormat};
551+
use crate::update::{IndexDocuments, Settings, UpdateFormat};
552+
use crate::FilterCondition;
512553

513554
#[test]
514555
fn delete_documents_with_numbers_as_primary_key() {
@@ -566,4 +607,55 @@ mod tests {
566607

567608
wtxn.commit().unwrap();
568609
}
610+
611+
#[test]
612+
fn delete_documents_with_filterable_attributes() {
613+
let path = tempfile::tempdir().unwrap();
614+
let mut options = EnvOpenOptions::new();
615+
options.map_size(10 * 1024 * 1024); // 10 MB
616+
let index = Index::new(options, &path).unwrap();
617+
618+
let mut wtxn = index.write_txn().unwrap();
619+
let mut builder = Settings::new(&mut wtxn, &index, 0);
620+
builder.set_primary_key(S("docid"));
621+
builder.set_filterable_fields(hashset! { S("label") });
622+
builder.execute(|_, _| ()).unwrap();
623+
624+
let content = &br#"[
625+
{"docid":"1_4","label":"sign"},
626+
{"docid":"1_5","label":"letter"},
627+
{"docid":"1_7","label":"abstract,cartoon,design,pattern"},
628+
{"docid":"1_36","label":"drawing,painting,pattern"},
629+
{"docid":"1_37","label":"art,drawing,outdoor"},
630+
{"docid":"1_38","label":"aquarium,art,drawing"},
631+
{"docid":"1_39","label":"abstract"},
632+
{"docid":"1_40","label":"cartoon"},
633+
{"docid":"1_41","label":"art,drawing"},
634+
{"docid":"1_42","label":"art,pattern"},
635+
{"docid":"1_43","label":"abstract,art,drawing,pattern"},
636+
{"docid":"1_44","label":"drawing"},
637+
{"docid":"1_45","label":"art"},
638+
{"docid":"1_46","label":"abstract,colorfulness,pattern"},
639+
{"docid":"1_47","label":"abstract,pattern"},
640+
{"docid":"1_52","label":"abstract,cartoon"},
641+
{"docid":"1_57","label":"abstract,drawing,pattern"},
642+
{"docid":"1_58","label":"abstract,art,cartoon"},
643+
{"docid":"1_68","label":"design"},
644+
{"docid":"1_69","label":"geometry"}
645+
]"#[..];
646+
let mut builder = IndexDocuments::new(&mut wtxn, &index, 0);
647+
builder.update_format(UpdateFormat::Json);
648+
builder.execute(content, |_, _| ()).unwrap();
649+
650+
// Delete not all of the documents but some of them.
651+
let mut builder = DeleteDocuments::new(&mut wtxn, &index, 1).unwrap();
652+
builder.delete_external_id("1_4");
653+
builder.execute().unwrap();
654+
655+
let filter = FilterCondition::from_str(&wtxn, &index, "label = sign").unwrap();
656+
let results = index.search(&wtxn).filter(filter).execute().unwrap();
657+
assert!(results.documents_ids.is_empty());
658+
659+
wtxn.commit().unwrap();
660+
}
569661
}

0 commit comments

Comments
 (0)