This repository was archived by the owner on Apr 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 83
This repository was archived by the owner on Apr 4, 2023. It is now read-only.
Separate the original facet string values from the docids #327
Copy link
Copy link
Closed
Labels
enhancementNew feature or requestNew feature or request
Description
I found out that storing the original facet strings in front of the documents ids associated with a normalized facet string in the facet_id_string_docids database wasn't a very good idea when it comes to deleting the entries.
As you can see it would be way better to just separate the original facet string into another database:
milli/milli/src/update/delete_documents.rs
Lines 457 to 516 in c51bb67
| fn remove_docids_from_facet_field_id_string_docids<'a, C, D>( | |
| wtxn: &'a mut heed::RwTxn, | |
| db: &heed::Database<C, D>, | |
| to_remove: &RoaringBitmap, | |
| ) -> crate::Result<()> { | |
| let db_name = Some(crate::index::db_name::FACET_ID_STRING_DOCIDS); | |
| let mut iter = db.remap_types::<ByteSlice, ByteSlice>().iter_mut(wtxn)?; | |
| while let Some(result) = iter.next() { | |
| let (key, val) = result?; | |
| match FacetLevelValueU32Codec::bytes_decode(key) { | |
| Some(_) => { | |
| // If we are able to parse this key it means it is a facet string group | |
| // level key. We must then parse the value using the appropriate codec. | |
| let (group, mut docids) = | |
| FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val) | |
| .ok_or_else(|| SerializationError::Decoding { db_name })?; | |
| let previous_len = docids.len(); | |
| docids -= to_remove; | |
| if docids.is_empty() { | |
| // safety: we don't keep references from inside the LMDB database. | |
| unsafe { iter.del_current()? }; | |
| } else if docids.len() != previous_len { | |
| let key = key.to_owned(); | |
| let val = &(group, docids); | |
| let value_bytes = | |
| FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val) | |
| .ok_or_else(|| SerializationError::Encoding { db_name })?; | |
| // safety: we don't keep references from inside the LMDB database. | |
| unsafe { iter.put_current(&key, &value_bytes)? }; | |
| } | |
| } | |
| None => { | |
| // The key corresponds to a level zero facet string. | |
| let (original_value, mut docids) = | |
| FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val) | |
| .ok_or_else(|| SerializationError::Decoding { db_name })?; | |
| let previous_len = docids.len(); | |
| docids -= to_remove; | |
| if docids.is_empty() { | |
| // safety: we don't keep references from inside the LMDB database. | |
| unsafe { iter.del_current()? }; | |
| } else if docids.len() != previous_len { | |
| let key = key.to_owned(); | |
| let val = &(original_value, docids); | |
| let value_bytes = | |
| FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val) | |
| .ok_or_else(|| SerializationError::Encoding { db_name })?; | |
| // safety: we don't keep references from inside the LMDB database. | |
| unsafe { iter.put_current(&key, &value_bytes)? }; | |
| } | |
| } | |
| } | |
| } | |
| Ok(()) | |
| } |
The amount of code to delete the documents ids from this database would be much lower. In addition to a little portion of code to delete the original values in the separate database too!
milli/milli/src/update/delete_documents.rs
Lines 429 to 455 in c51bb67
| fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F, DC, V>( | |
| wtxn: &'a mut heed::RwTxn, | |
| db: &heed::Database<C, DC>, | |
| field_id: FieldId, | |
| to_remove: &RoaringBitmap, | |
| convert: F, | |
| ) -> heed::Result<()> | |
| where | |
| C: heed::BytesDecode<'a, DItem = K>, | |
| DC: heed::BytesDecode<'a, DItem = V>, | |
| F: Fn(K) -> DocumentId, | |
| { | |
| let mut iter = db | |
| .remap_key_type::<ByteSlice>() | |
| .prefix_iter_mut(wtxn, &field_id.to_be_bytes())? | |
| .remap_key_type::<C>(); | |
| while let Some(result) = iter.next() { | |
| let (key, _) = result?; | |
| if to_remove.contains(convert(key)) { | |
| // safety: we don't keep references from inside the LMDB database. | |
| unsafe { iter.del_current()? }; | |
| } | |
| } | |
| Ok(()) | |
| } |
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request