Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit a99dc21

Browse files
committed
remove smallvec in transform loop
1 parent 46b80b8 commit a99dc21

File tree

3 files changed

+20
-9
lines changed

3 files changed

+20
-9
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

milli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ smallstr = { version = "0.2.0", features = ["serde"] }
3535
smallvec = { version = "1.6.1", features = ["write"] }
3636
tempfile = "3.2.0"
3737
uuid = { version = "0.8.2", features = ["v4"] }
38+
vec-utils = "0.3.0"
3839

3940
# facet filter parser
4041
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }

milli/src/update/index_documents/transform.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use itertools::Itertools;
1010
use log::info;
1111
use roaring::RoaringBitmap;
1212
use serde_json::{Map, Value};
13-
use smallvec::SmallVec;
13+
use vec_utils::VecExt;
1414

1515
use super::helpers::{
1616
create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn,
@@ -128,28 +128,29 @@ impl Transform<'_, '_> {
128128

129129
let mut obkv_buffer = Vec::new();
130130
let mut documents_count = 0;
131+
let mut external_id_buffer = Vec::new();
132+
let mut field_buffer: Vec<(u16, &[u8])> = Vec::new();
131133
while let Some((addition_index, document)) = reader.next_document_with_index()? {
134+
let mut field_buffer_cache = field_buffer.drop_and_reuse();
132135
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
133136
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
134137
documents_seen: documents_count,
135138
});
136139
}
137140

138-
let mut external_id_buffer = SmallVec::<[u8; 512]>::new();
139-
let mut field_buffer = SmallVec::<[(u16, &[u8]); 128]>::new();
140-
let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH];
141141

142142
for (k, v) in document.iter() {
143143
let mapped_id = *mapping.get(&k).unwrap();
144-
field_buffer.push((mapped_id, v));
144+
field_buffer_cache.push((mapped_id, v));
145145
}
146146

147147
// We need to make sure that every document has a primary key. After we have remapped
148148
// all the fields in the document, we try to find the primary key value. If we can find
149149
// it, transform it into a string and validate it, and then update it in the
150150
// document. If none is found, and we were told to generate missing document ids, then
151151
// we create the missing field, and update the new document.
152-
let external_id = match field_buffer.iter_mut().find(|(id, _)| *id == primary_key_id) {
152+
let mut uuid_buffer = [0; uuid::adapter::Hyphenated::LENGTH];
153+
let external_id = match field_buffer_cache.iter_mut().find(|(id, _)| *id == primary_key_id) {
153154
Some((_, bytes)) => {
154155
let value = match serde_json::from_slice(bytes).unwrap() {
155156
Value::String(string) => match validate_document_id(&string) {
@@ -191,19 +192,19 @@ impl Transform<'_, '_> {
191192

192193
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
193194
serde_json::to_writer(&mut external_id_buffer, &uuid).unwrap();
194-
field_buffer.push((primary_key_id, &external_id_buffer));
195+
field_buffer_cache.push((primary_key_id, &external_id_buffer));
195196
Cow::Borrowed(&*uuid)
196197
}
197198
};
198199

199200
// Insertion in a obkv need to be done with keys ordered. For now they are ordered
200201
// according to the document addition key order, so we sort it according to the
201202
// fieldids map keys order.
202-
field_buffer.sort_unstable_by(|(f1, _), (f2, _)| f1.cmp(&f2));
203+
field_buffer_cache.sort_unstable_by(|(f1, _), (f2, _)| f1.cmp(&f2));
203204

204205
// The last step is to build the new obkv document, and insert it in the sorter.
205206
let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
206-
for (k, v) in field_buffer.iter() {
207+
for (k, v) in field_buffer_cache.iter() {
207208
writer.insert(*k, v)?;
208209
}
209210

@@ -216,6 +217,8 @@ impl Transform<'_, '_> {
216217
});
217218

218219
obkv_buffer.clear();
220+
field_buffer = field_buffer_cache.drop_and_reuse();
221+
external_id_buffer.clear();
219222
}
220223

221224
progress_callback(UpdateIndexingStep::RemapDocumentAddition {

0 commit comments

Comments
 (0)