@@ -207,20 +207,27 @@ impl HnswIndexProvider {
207
207
208
208
let index_config = IndexConfig :: new ( dimensionality, distance_function) ;
209
209
210
- let storage_path_str = match new_storage_path. to_str ( ) {
211
- Some ( storage_path_str) => storage_path_str,
212
- None => {
213
- return Err ( Box :: new ( HnswIndexProviderForkError :: PathToStringError (
214
- new_storage_path,
215
- ) ) ) ;
216
- }
217
- } ;
210
+ // let storage_path_str = match new_storage_path.to_str() {
211
+ // Some(storage_path_str) => storage_path_str,
212
+ // None => {
213
+ // return Err(Box::new(HnswIndexProviderForkError::PathToStringError(
214
+ // new_storage_path,
215
+ // )));
216
+ // }
217
+ // };
218
218
219
219
// Check if the entry is in the cache, if it is, we assume
220
220
// another thread has loaded the index and we return it.
221
221
match self . get ( & new_id, cache_key) . await {
222
222
Some ( index) => Ok ( index. clone ( ) ) ,
223
- None => match HnswIndex :: load ( storage_path_str, & index_config, ef_search, new_id) {
223
+ None => match HnswIndex :: load_from_hnsw_data (
224
+ self . fetch_hnsw_segment ( & new_id, prefix_path)
225
+ . await
226
+ . map_err ( |e| Box :: new ( HnswIndexProviderForkError :: FileError ( * e) ) ) ?,
227
+ & index_config,
228
+ ef_search,
229
+ new_id,
230
+ ) {
224
231
Ok ( index) => {
225
232
let index = HnswIndexRef {
226
233
inner : Arc :: new ( RwLock :: new ( DistributedHnswInner {
@@ -277,10 +284,33 @@ impl HnswIndexProvider {
277
284
prefix_path : & str ,
278
285
) -> Result < ( ) , Box < HnswIndexProviderFileError > > {
279
286
// Fetch the files from storage and put them in the index storage path.
287
+ let hnsw_data = self . fetch_hnsw_segment ( source_id, prefix_path) . await ?;
288
+ let getters = [
289
+ |hnsw_data : & hnswlib:: HnswData | Arc :: new ( Vec :: from ( hnsw_data. header_buffer ( ) ) ) ,
290
+ |hnsw_data : & hnswlib:: HnswData | Arc :: new ( Vec :: from ( hnsw_data. data_level0_buffer ( ) ) ) ,
291
+ |hnsw_data : & hnswlib:: HnswData | Arc :: new ( Vec :: from ( hnsw_data. length_buffer ( ) ) ) ,
292
+ |hnsw_data : & hnswlib:: HnswData | Arc :: new ( Vec :: from ( hnsw_data. link_list_buffer ( ) ) ) ,
293
+ ] ;
294
+
295
+ for ( file, getter) in FILES . iter ( ) . zip ( getters) {
296
+ let file_path = index_storage_path. join ( file) ;
297
+ self . copy_bytes_to_local_file ( & file_path, getter ( & hnsw_data) )
298
+ . await ?;
299
+ }
300
+ Ok ( ( ) )
301
+ }
302
+
303
+ async fn fetch_hnsw_segment (
304
+ & self ,
305
+ source_id : & IndexUuid ,
306
+ prefix_path : & str ,
307
+ ) -> Result < hnswlib:: HnswData , Box < HnswIndexProviderFileError > > {
308
+ let mut buffers = Vec :: new ( ) ;
309
+
280
310
for file in FILES . iter ( ) {
281
311
let s3_fetch_span =
282
312
tracing:: trace_span!( parent: Span :: current( ) , "Read bytes from s3" , file = file) ;
283
- let buf = s3_fetch_span
313
+ let _ = s3_fetch_span
284
314
. in_scope ( || async {
285
315
let key = Self :: format_key ( prefix_path, source_id, file) ;
286
316
tracing:: info!( "Loading hnsw index file: {} into directory" , key) ;
@@ -304,13 +334,24 @@ impl HnswIndexProvider {
304
334
bytes_read,
305
335
key,
306
336
) ;
307
- Ok ( buf)
337
+ buffers. push ( buf) ;
338
+ Ok ( ( ) )
308
339
} )
309
340
. await ?;
310
- let file_path = index_storage_path. join ( file) ;
311
- self . copy_bytes_to_local_file ( & file_path, buf) . await ?;
312
341
}
313
- Ok ( ( ) )
342
+ match hnswlib:: HnswData :: new_from_buffers (
343
+ buffers[ 0 ] . clone ( ) ,
344
+ buffers[ 1 ] . clone ( ) ,
345
+ buffers[ 2 ] . clone ( ) ,
346
+ buffers[ 3 ] . clone ( ) ,
347
+ ) {
348
+ Ok ( hnsw_data) => Ok ( hnsw_data) ,
349
+ Err ( e) => Err ( Box :: new ( HnswIndexProviderFileError :: StorageError (
350
+ chroma_storage:: StorageError :: Message {
351
+ message : e. to_string ( ) ,
352
+ } ,
353
+ ) ) ) ,
354
+ }
314
355
}
315
356
316
357
pub async fn open (
@@ -356,20 +397,27 @@ impl HnswIndexProvider {
356
397
357
398
let index_config = IndexConfig :: new ( dimensionality, distance_function) ;
358
399
359
- let index_storage_path_str = match index_storage_path. to_str ( ) {
360
- Some ( index_storage_path_str) => index_storage_path_str,
361
- None => {
362
- return Err ( Box :: new ( HnswIndexProviderOpenError :: PathToStringError (
363
- index_storage_path,
364
- ) ) ) ;
365
- }
366
- } ;
400
+ // let index_storage_path_str = match index_storage_path.to_str() {
401
+ // Some(index_storage_path_str) => index_storage_path_str,
402
+ // None => {
403
+ // return Err(Box::new(HnswIndexProviderOpenError::PathToStringError(
404
+ // index_storage_path,
405
+ // )));
406
+ // }
407
+ // };
367
408
368
409
// Check if the entry is in the cache, if it is, we assume
369
410
// another thread has loaded the index and we return it.
370
411
let index = match self . get ( id, cache_key) . await {
371
412
Some ( index) => Ok ( index. clone ( ) ) ,
372
- None => match HnswIndex :: load ( index_storage_path_str, & index_config, ef_search, * id) {
413
+ None => match HnswIndex :: load_from_hnsw_data (
414
+ self . fetch_hnsw_segment ( id, prefix_path)
415
+ . await
416
+ . map_err ( |e| Box :: new ( HnswIndexProviderOpenError :: FileError ( * e) ) ) ?,
417
+ & index_config,
418
+ ef_search,
419
+ * id,
420
+ ) {
373
421
Ok ( index) => {
374
422
let index = HnswIndexRef {
375
423
inner : Arc :: new ( RwLock :: new ( DistributedHnswInner {
0 commit comments