Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 642008e

Browse files
committed
document reader transform (WIP)
1 parent 1edeeb5 commit 642008e

File tree

3 files changed

+367
-249
lines changed

3 files changed

+367
-249
lines changed

milli/src/documents/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
/// The documents module defines an intermediary document format that milli uses for indexation, and
2+
/// provides an API to easily build and read such documents.
3+
///
4+
/// The `DocumentBuilder` interface allows to write batches of documents to a writer, that can
5+
/// later be read by milli using the `DocumentsReader` interface.
6+
17
mod reader;
28
mod builder;
39
mod serde;

milli/src/update/index_documents/mod.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::borrow::Cow;
22
use std::collections::HashSet;
33
use std::fs::File;
4-
use std::io::{self, BufRead, BufReader, Seek, SeekFrom};
4+
use std::io::{self, BufRead, BufReader, Seek, SeekFrom, Read};
55
use std::num::{NonZeroU32, NonZeroUsize};
66
use std::result::Result as StdResult;
77
use std::str;
@@ -329,7 +329,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
329329

330330
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> Result<DocumentAdditionResult>
331331
where
332-
R: io::Read,
332+
R: Read + Seek,
333333
F: Fn(UpdateIndexingStep, u64) + Sync,
334334
{
335335
let mut reader = BufReader::new(reader);
@@ -358,14 +358,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
358358
autogenerate_docids: self.autogenerate_docids,
359359
};
360360

361-
let output = match self.update_format {
362-
UpdateFormat::Csv => transform.output_from_csv(reader, &progress_callback)?,
363-
UpdateFormat::Json => transform.output_from_json(reader, &progress_callback)?,
364-
UpdateFormat::JsonStream => {
365-
transform.output_from_json_stream(reader, &progress_callback)?
366-
}
367-
};
368-
361+
let output = transform.read_documents(reader, progress_callback)?;
369362
let nb_documents = output.documents_count;
370363

371364
info!("Update transformed in {:.02?}", before_transform.elapsed());

0 commit comments

Comments
 (0)