@@ -10,7 +10,7 @@ use itertools::Itertools;
10
10
use log:: info;
11
11
use roaring:: RoaringBitmap ;
12
12
use serde_json:: { Map , Value } ;
13
- use smallvec :: SmallVec ;
13
+ use vec_utils :: VecExt ;
14
14
15
15
use super :: helpers:: {
16
16
create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn ,
@@ -128,28 +128,29 @@ impl Transform<'_, '_> {
128
128
129
129
let mut obkv_buffer = Vec :: new ( ) ;
130
130
let mut documents_count = 0 ;
131
+ let mut external_id_buffer = Vec :: new ( ) ;
132
+ let mut field_buffer: Vec < ( u16 , & [ u8 ] ) > = Vec :: new ( ) ;
131
133
while let Some ( ( addition_index, document) ) = reader. next_document_with_index ( ) ? {
134
+ let mut field_buffer_cache = field_buffer. drop_and_reuse ( ) ;
132
135
if self . log_every_n . map_or ( false , |len| documents_count % len == 0 ) {
133
136
progress_callback ( UpdateIndexingStep :: RemapDocumentAddition {
134
137
documents_seen : documents_count,
135
138
} ) ;
136
139
}
137
140
138
- let mut external_id_buffer = SmallVec :: < [ u8 ; 512 ] > :: new ( ) ;
139
- let mut field_buffer = SmallVec :: < [ ( u16 , & [ u8 ] ) ; 128 ] > :: new ( ) ;
140
- let mut uuid_buffer = [ 0 ; uuid:: adapter:: Hyphenated :: LENGTH ] ;
141
141
142
142
for ( k, v) in document. iter ( ) {
143
143
let mapped_id = * mapping. get ( & k) . unwrap ( ) ;
144
- field_buffer . push ( ( mapped_id, v) ) ;
144
+ field_buffer_cache . push ( ( mapped_id, v) ) ;
145
145
}
146
146
147
147
// We need to make sure that every document has a primary key. After we have remapped
148
148
// all the fields in the document, we try to find the primary key value. If we can find
149
149
// it, transform it into a string and validate it, and then update it in the
150
150
// document. If none is found, and we were told to generate missing document ids, then
151
151
// we create the missing field, and update the new document.
152
- let external_id = match field_buffer. iter_mut ( ) . find ( |( id, _) | * id == primary_key_id) {
152
+ let mut uuid_buffer = [ 0 ; uuid:: adapter:: Hyphenated :: LENGTH ] ;
153
+ let external_id = match field_buffer_cache. iter_mut ( ) . find ( |( id, _) | * id == primary_key_id) {
153
154
Some ( ( _, bytes) ) => {
154
155
let value = match serde_json:: from_slice ( bytes) . unwrap ( ) {
155
156
Value :: String ( string) => match validate_document_id ( & string) {
@@ -191,19 +192,19 @@ impl Transform<'_, '_> {
191
192
192
193
let uuid = uuid:: Uuid :: new_v4 ( ) . to_hyphenated ( ) . encode_lower ( & mut uuid_buffer) ;
193
194
serde_json:: to_writer ( & mut external_id_buffer, & uuid) . unwrap ( ) ;
194
- field_buffer . push ( ( primary_key_id, & external_id_buffer) ) ;
195
+ field_buffer_cache . push ( ( primary_key_id, & external_id_buffer) ) ;
195
196
Cow :: Borrowed ( & * uuid)
196
197
}
197
198
} ;
198
199
199
200
// Insertion in a obkv need to be done with keys ordered. For now they are ordered
200
201
// according to the document addition key order, so we sort it according to the
201
202
// fieldids map keys order.
202
- field_buffer . sort_unstable_by ( |( f1, _) , ( f2, _) | f1. cmp ( & f2) ) ;
203
+ field_buffer_cache . sort_unstable_by ( |( f1, _) , ( f2, _) | f1. cmp ( & f2) ) ;
203
204
204
205
// The last step is to build the new obkv document, and insert it in the sorter.
205
206
let mut writer = obkv:: KvWriter :: new ( & mut obkv_buffer) ;
206
- for ( k, v) in field_buffer . iter ( ) {
207
+ for ( k, v) in field_buffer_cache . iter ( ) {
207
208
writer. insert ( * k, v) ?;
208
209
}
209
210
@@ -216,6 +217,8 @@ impl Transform<'_, '_> {
216
217
} ) ;
217
218
218
219
obkv_buffer. clear ( ) ;
220
+ field_buffer = field_buffer_cache. drop_and_reuse ( ) ;
221
+ external_id_buffer. clear ( ) ;
219
222
}
220
223
221
224
progress_callback ( UpdateIndexingStep :: RemapDocumentAddition {
0 commit comments