Skip to content

Commit fff5ccc

Browse files
committed
add DecoderOptions struct for holding options for decoder
that way later extensions to the decoder can be added to this struct without breaking API.
1 parent 8ac855c commit fff5ccc

File tree

1 file changed

+35
-37
lines changed

1 file changed

+35
-37
lines changed

arrow/src/json/reader.rs

Lines changed: 35 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ where
564564
/// BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
565565
/// let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
566566
/// let batch_size = 1024;
567-
/// let decoder = Decoder::new(Arc::new(inferred_schema), batch_size, None, None);
567+
/// let decoder = Decoder::new(Arc::new(inferred_schema), batch_size, Default::default());
568568
///
569569
/// // seek back to start so that the original file is usable again
570570
/// reader.seek(SeekFrom::Start(0)).unwrap();
@@ -578,34 +578,34 @@ pub struct Decoder {
578578
/// Explicit schema for the JSON file
579579
schema: SchemaRef,
580580
/// Optional projection for which columns to load (case-sensitive names)
581-
projection: Option<Vec<String>>,
582-
/// Batch size (number of records to load each time)
583581
batch_size: usize,
582+
///
583+
doptions: DecoderOptions,
584+
}
585+
586+
#[derive(Default, Debug)]
587+
pub struct DecoderOptions {
588+
/// Optional projection for which columns to load (case-sensitive names)
589+
projection: Option<Vec<String>>,
584590
/// optional HashMap of column names to its format strings
585591
format_strings: Option<HashMap<String, String>>,
586592
}
587593

588594
impl Decoder {
589595
/// Create a new JSON decoder from any value that implements the `Iterator<Item=Result<Value>>`
590596
/// trait.
591-
pub fn new(
592-
schema: SchemaRef,
593-
batch_size: usize,
594-
projection: Option<Vec<String>>,
595-
format_strings: Option<HashMap<String, String>>,
596-
) -> Self {
597+
pub fn new(schema: SchemaRef, batch_size: usize, doptions: DecoderOptions) -> Self {
597598
Self {
598599
schema,
599-
projection,
600600
batch_size,
601-
format_strings,
601+
doptions,
602602
}
603603
}
604604

605605
/// Returns the schema of the reader, useful for getting the schema without reading
606606
/// record batches
607607
pub fn schema(&self) -> SchemaRef {
608-
match &self.projection {
608+
match &self.doptions.projection {
609609
Some(projection) => {
610610
let fields = self.schema.fields();
611611
let projected_fields: Vec<Field> = fields
@@ -650,7 +650,7 @@ impl Decoder {
650650
}
651651

652652
let rows = &rows[..];
653-
let projection = self.projection.clone().unwrap_or_default();
653+
let projection = self.doptions.projection.clone().unwrap_or_default();
654654
let arrays = self.build_struct_array(rows, self.schema.fields(), &projection);
655655

656656
let projected_fields: Vec<Field> = if projection.is_empty() {
@@ -928,6 +928,7 @@ impl Decoder {
928928
T::Native: num::NumCast,
929929
{
930930
let format_string = self
931+
.doptions
931932
.format_strings
932933
.as_ref()
933934
.and_then(|fmts| fmts.get(col_name));
@@ -1553,16 +1554,9 @@ impl<R: Read> Reader<R> {
15531554
reader: R,
15541555
schema: SchemaRef,
15551556
batch_size: usize,
1556-
projection: Option<Vec<String>>,
1557-
format_strings: Option<HashMap<String, String>>,
1557+
doptions: DecoderOptions,
15581558
) -> Self {
1559-
Self::from_buf_reader(
1560-
BufReader::new(reader),
1561-
schema,
1562-
batch_size,
1563-
projection,
1564-
format_strings,
1565-
)
1559+
Self::from_buf_reader(BufReader::new(reader), schema, batch_size, doptions)
15661560
}
15671561

15681562
/// Create a new JSON Reader from a `BufReader<R: Read>`
@@ -1572,12 +1566,11 @@ impl<R: Read> Reader<R> {
15721566
reader: BufReader<R>,
15731567
schema: SchemaRef,
15741568
batch_size: usize,
1575-
projection: Option<Vec<String>>,
1576-
format_strings: Option<HashMap<String, String>>,
1569+
doptions: DecoderOptions,
15771570
) -> Self {
15781571
Self {
15791572
reader,
1580-
decoder: Decoder::new(schema, batch_size, projection, format_strings),
1573+
decoder: Decoder::new(schema, batch_size, doptions),
15811574
}
15821575
}
15831576

@@ -1712,8 +1705,10 @@ impl ReaderBuilder {
17121705
buf_reader,
17131706
schema,
17141707
self.batch_size,
1715-
self.projection,
1716-
self.format_strings,
1708+
DecoderOptions {
1709+
projection: self.projection,
1710+
format_strings: self.format_strings,
1711+
},
17171712
))
17181713
}
17191714
}
@@ -1868,8 +1863,7 @@ mod tests {
18681863
File::open("test/data/basic.json").unwrap(),
18691864
Arc::new(schema.clone()),
18701865
1024,
1871-
None,
1872-
None,
1866+
Default::default(),
18731867
);
18741868
let reader_schema = reader.schema();
18751869
assert_eq!(reader_schema, Arc::new(schema));
@@ -1920,8 +1914,10 @@ mod tests {
19201914
File::open("test/data/basic.json").unwrap(),
19211915
schema.clone(),
19221916
1024,
1923-
None,
1924-
Some(fmts),
1917+
DecoderOptions {
1918+
format_strings: Some(fmts),
1919+
..Default::default()
1920+
},
19251921
);
19261922
let reader_schema = reader.schema();
19271923
assert_eq!(reader_schema, schema);
@@ -1954,8 +1950,10 @@ mod tests {
19541950
File::open("test/data/basic.json").unwrap(),
19551951
Arc::new(schema),
19561952
1024,
1957-
Some(vec!["a".to_string(), "c".to_string()]),
1958-
None,
1953+
DecoderOptions {
1954+
projection: Some(vec!["a".to_string(), "c".to_string()]),
1955+
..Default::default()
1956+
},
19591957
);
19601958
let reader_schema = reader.schema();
19611959
let expected_schema = Arc::new(Schema::new(vec![
@@ -2123,7 +2121,7 @@ mod tests {
21232121

21242122
let reader = BufReader::new(GzDecoder::new(&file));
21252123
let mut reader =
2126-
Reader::from_buf_reader(reader, Arc::new(schema), 64, None, None);
2124+
Reader::from_buf_reader(reader, Arc::new(schema), 64, Default::default());
21272125
let batch_gz = reader.next().unwrap().unwrap();
21282126

21292127
for batch in vec![batch, batch_gz] {
@@ -3164,7 +3162,7 @@ mod tests {
31643162
true,
31653163
)]);
31663164

3167-
let decoder = Decoder::new(Arc::new(schema), 1024, None, None);
3165+
let decoder = Decoder::new(Arc::new(schema), 1024, Default::default());
31683166
let batch = decoder
31693167
.next_batch(
31703168
&mut vec![
@@ -3199,7 +3197,7 @@ mod tests {
31993197
true,
32003198
)]);
32013199

3202-
let decoder = Decoder::new(Arc::new(schema), 1024, None, None);
3200+
let decoder = Decoder::new(Arc::new(schema), 1024, Default::default());
32033201
let batch = decoder
32043202
.next_batch(
32053203
// NOTE: total struct element count needs to be greater than
@@ -3228,7 +3226,7 @@ mod tests {
32283226
#[test]
32293227
fn test_json_read_binary_structs() {
32303228
let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]);
3231-
let decoder = Decoder::new(Arc::new(schema), 1024, None, None);
3229+
let decoder = Decoder::new(Arc::new(schema), 1024, Default::default());
32323230
let batch = decoder
32333231
.next_batch(
32343232
&mut vec![

0 commit comments

Comments
 (0)