Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit b3d61e0

Browse files
committed
review edits
1 parent 6de1b41 commit b3d61e0

File tree

5 files changed

+53
-126
lines changed

5 files changed

+53
-126
lines changed

Cargo.lock

Lines changed: 9 additions & 103 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

milli/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ smallstr = { version = "0.2.0", features = ["serde"] }
3535
smallvec = { version = "1.6.1", features = ["write"] }
3636
tempfile = "3.2.0"
3737
uuid = { version = "0.8.2", features = ["v4"] }
38-
vec-utils = "0.3.0"
3938

4039
# facet filter parser
4140
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }

milli/src/documents/serde.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,11 @@ impl<'a, W: io::Write> SerializeMap for MapSerializer<'a, W> {
236236
type Error = Error;
237237

238238
fn serialize_key<T: ?Sized + Serialize>(&mut self, _key: &T) -> Result<(), Self::Error> {
239-
unimplemented!()
239+
unreachable!()
240240
}
241241

242242
fn serialize_value<T: ?Sized>(&mut self, _value: &T) -> Result<(), Self::Error> {
243-
unimplemented!()
243+
unreachable!()
244244
}
245245

246246
fn end(mut self) -> Result<Self::Ok, Self::Error> {
@@ -262,8 +262,8 @@ impl<'a, W: io::Write> SerializeMap for MapSerializer<'a, W> {
262262
K: Serialize,
263263
V: Serialize,
264264
{
265-
let field_serilizer = FieldSerializer { index: &mut self.index };
266-
let field_id: FieldId = key.serialize(field_serilizer)?;
265+
let field_serializer = FieldSerializer { index: &mut self.index };
266+
let field_id: FieldId = key.serialize(field_serializer)?;
267267

268268
self.buffer.clear();
269269
let mut cursor = io::Cursor::new(&mut self.buffer);
@@ -294,7 +294,7 @@ impl<'a> serde::Serializer for FieldSerializer<'a> {
294294

295295
fn serialize_str(self, ws: &str) -> Result<Self::Ok, Self::Error> {
296296
let field_id = match self.index.get_by_right(ws) {
297-
Some(field) => *field,
297+
Some(field_id) => *field_id,
298298
None => {
299299
let field_id = self.index.len() as FieldId;
300300
self.index.insert(field_id, ws.to_string());

milli/src/update/index_documents/transform.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use itertools::Itertools;
1010
use log::info;
1111
use roaring::RoaringBitmap;
1212
use serde_json::{Map, Value};
13-
use vec_utils::VecExt;
1413

1514
use super::helpers::{
1615
create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn,
@@ -131,7 +130,7 @@ impl Transform<'_, '_> {
131130
let mut external_id_buffer = Vec::new();
132131
let mut field_buffer: Vec<(u16, &[u8])> = Vec::new();
133132
while let Some((addition_index, document)) = reader.next_document_with_index()? {
134-
let mut field_buffer_cache = field_buffer.drop_and_reuse();
133+
let mut field_buffer_cache = drop_and_reuse(field_buffer);
135134
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
136135
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
137136
documents_seen: documents_count,
@@ -217,7 +216,7 @@ impl Transform<'_, '_> {
217216
});
218217

219218
obkv_buffer.clear();
220-
field_buffer = field_buffer_cache.drop_and_reuse();
219+
field_buffer = drop_and_reuse(field_buffer_cache);
221220
external_id_buffer.clear();
222221
}
223222

@@ -482,6 +481,20 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
482481
})
483482
}
484483

484+
/// This function drops and reuses a Vec, transmuting it's contained type along the way.
485+
///
486+
/// This is usefull when you want to change the lifetime ascociated with a vec of references, while
487+
/// keeping the allocation.
488+
///
489+
/// The trick here is that the call to collect will reuse the vec allocation.
490+
fn drop_and_reuse<U, T>(mut vec: Vec<U>) -> Vec<T> {
491+
debug_assert_eq!(std::mem::align_of::<U>(), std::mem::align_of::<T>());
492+
debug_assert_eq!(std::mem::size_of::<U>(), std::mem::size_of::<T>());
493+
vec.clear();
494+
debug_assert!(vec.is_empty());
495+
vec.into_iter().map(|_| unreachable!()).collect()
496+
}
497+
485498
#[cfg(test)]
486499
mod test {
487500
use super::*;

milli/tests/search/query_criteria.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
use std::cmp::Reverse;
2+
use std::io::Cursor;
23

34
use big_s::S;
45
use heed::EnvOpenOptions;
56
use itertools::Itertools;
67
use maplit::hashset;
7-
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
8+
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
9+
use milli::update::{Settings, UpdateBuilder};
810
use milli::{AscDesc, Criterion, Index, Search, SearchResult};
911
use rand::Rng;
1012
use Criterion::*;
@@ -386,13 +388,13 @@ fn criteria_ascdesc() {
386388
let mut builder = UpdateBuilder::new(0);
387389
builder.max_memory(10 * 1024 * 1024); // 10MiB
388390
let mut builder = builder.index_documents(&mut wtxn, &index);
389-
builder.update_format(UpdateFormat::Csv);
390391
builder.enable_autogenerate_docids();
391392

392-
let content = [
393-
vec![S("name,age")],
394-
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1)
395-
.map(|_| {
393+
let mut cursor = Cursor::new(Vec::new());
394+
let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();
395+
396+
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1)
397+
.for_each(|_| {
396398
let mut rng = rand::thread_rng();
397399

398400
let age = rng.gen::<u32>().to_string();
@@ -403,14 +405,21 @@ fn criteria_ascdesc() {
403405
.take(10)
404406
.collect::<String>();
405407

406-
format!("{},{}", name, age)
407-
})
408-
.collect::<Vec<_>>(),
409-
]
410-
.iter()
411-
.flatten()
412-
.join("\n");
413-
builder.execute(content.as_bytes(), |_, _| ()).unwrap();
408+
let json = serde_json::json!({
409+
"name": name,
410+
"age": age,
411+
});
412+
413+
batch_builder.add_documents(json).unwrap();
414+
});
415+
416+
batch_builder.finish().unwrap();
417+
418+
cursor.set_position(0);
419+
420+
let reader = DocumentBatchReader::from_reader(cursor).unwrap();
421+
422+
builder.execute(reader, |_, _| ()).unwrap();
414423

415424
wtxn.commit().unwrap();
416425

0 commit comments

Comments
 (0)