@@ -56,11 +56,10 @@ use arrow::datatypes::{
5656use arrow:: util:: bit_util;
5757
5858use crate :: arrow:: converter:: {
59- BinaryArrayConverter , BinaryConverter , Converter , DecimalArrayConverter ,
60- DecimalConverter , FixedLenBinaryConverter , FixedSizeArrayConverter ,
61- Int96ArrayConverter , Int96Converter , IntervalDayTimeArrayConverter ,
62- IntervalDayTimeConverter , IntervalYearMonthArrayConverter ,
63- IntervalYearMonthConverter , Utf8ArrayConverter , Utf8Converter ,
59+ Converter , DecimalArrayConverter , DecimalConverter , FixedLenBinaryConverter ,
60+ FixedSizeArrayConverter , Int96ArrayConverter , Int96Converter ,
61+ IntervalDayTimeArrayConverter , IntervalDayTimeConverter ,
62+ IntervalYearMonthArrayConverter , IntervalYearMonthConverter ,
6463} ;
6564use crate :: arrow:: record_reader:: buffer:: { ScalarValue , ValuesBuffer } ;
6665use crate :: arrow:: record_reader:: { GenericRecordReader , RecordReader } ;
@@ -70,8 +69,8 @@ use crate::column::page::PageIterator;
7069use crate :: column:: reader:: decoder:: ColumnValueDecoder ;
7170use crate :: column:: reader:: ColumnReaderImpl ;
7271use crate :: data_type:: {
73- BoolType , ByteArrayType , DataType , DoubleType , FixedLenByteArrayType , FloatType ,
74- Int32Type , Int64Type , Int96Type ,
72+ BoolType , DataType , DoubleType , FixedLenByteArrayType , FloatType , Int32Type ,
73+ Int64Type , Int96Type ,
7574} ;
7675use crate :: errors:: { ParquetError , ParquetError :: ArrowError , Result } ;
7776use crate :: file:: reader:: { FilePageIterator , FileReader } ;
@@ -81,9 +80,15 @@ use crate::schema::types::{
8180use crate :: schema:: visitor:: TypeVisitor ;
8281
8382mod byte_array;
83+ mod byte_array_dictionary;
84+ mod dictionary_buffer;
8485mod offset_buffer;
8586
87+ #[ cfg( test) ]
88+ mod test_util;
89+
8690pub use byte_array:: make_byte_array_reader;
91+ pub use byte_array_dictionary:: make_byte_array_dictionary_reader;
8792
8893/// Array reader reads parquet data into arrow array.
8994pub trait ArrayReader {
@@ -271,7 +276,8 @@ where
271276 . clone ( ) ,
272277 } ;
273278
274- let record_reader = RecordReader :: < T > :: new_with_options ( column_desc. clone ( ) , null_mask_only) ;
279+ let record_reader =
280+ RecordReader :: < T > :: new_with_options ( column_desc. clone ( ) , null_mask_only) ;
275281
276282 Ok ( Self {
277283 data_type,
@@ -829,17 +835,18 @@ fn remove_indices(
829835 size
830836 ) ,
831837 ArrowType :: Struct ( fields) => {
832- let struct_array = arr. as_any ( )
838+ let struct_array = arr
839+ . as_any ( )
833840 . downcast_ref :: < StructArray > ( )
834841 . expect ( "Array should be a struct" ) ;
835842
836843 // Recursively call remove indices on each of the structs fields
837- let new_columns = fields. into_iter ( )
844+ let new_columns = fields
845+ . into_iter ( )
838846 . zip ( struct_array. columns ( ) )
839847 . map ( |( field, column) | {
840848 let dt = field. data_type ( ) . clone ( ) ;
841- Ok ( ( field,
842- remove_indices ( column. clone ( ) , dt, indices. clone ( ) ) ?) )
849+ Ok ( ( field, remove_indices ( column. clone ( ) , dt, indices. clone ( ) ) ?) )
843850 } )
844851 . collect :: < Result < Vec < _ > > > ( ) ?;
845852
@@ -1783,35 +1790,12 @@ impl<'a> ArrayReaderBuilder {
17831790 ) ?,
17841791 ) ) ,
17851792 PhysicalType :: BYTE_ARRAY => match arrow_type {
1786- // TODO: Replace with optimised dictionary reader (#171)
1787- Some ( ArrowType :: Dictionary ( _, _) ) => {
1788- match cur_type. get_basic_info ( ) . converted_type ( ) {
1789- ConvertedType :: UTF8 => {
1790- let converter = Utf8Converter :: new ( Utf8ArrayConverter { } ) ;
1791- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1792- ByteArrayType ,
1793- Utf8Converter ,
1794- > :: new (
1795- page_iterator,
1796- column_desc,
1797- converter,
1798- arrow_type,
1799- ) ?) )
1800- }
1801- _ => {
1802- let converter = BinaryConverter :: new ( BinaryArrayConverter { } ) ;
1803- Ok ( Box :: new ( ComplexObjectArrayReader :: <
1804- ByteArrayType ,
1805- BinaryConverter ,
1806- > :: new (
1807- page_iterator,
1808- column_desc,
1809- converter,
1810- arrow_type,
1811- ) ?) )
1812- }
1813- }
1814- }
1793+ Some ( ArrowType :: Dictionary ( _, _) ) => make_byte_array_dictionary_reader (
1794+ page_iterator,
1795+ column_desc,
1796+ arrow_type,
1797+ null_mask_only,
1798+ ) ,
18151799 _ => make_byte_array_reader (
18161800 page_iterator,
18171801 column_desc,
@@ -2025,7 +2009,7 @@ mod tests {
20252009 use crate :: arrow:: schema:: parquet_to_arrow_schema;
20262010 use crate :: basic:: { Encoding , Type as PhysicalType } ;
20272011 use crate :: column:: page:: { Page , PageReader } ;
2028- use crate :: data_type:: { ByteArray , DataType , Int32Type , Int64Type } ;
2012+ use crate :: data_type:: { ByteArray , ByteArrayType , DataType , Int32Type , Int64Type } ;
20292013 use crate :: errors:: Result ;
20302014 use crate :: file:: reader:: { FileReader , SerializedFileReader } ;
20312015 use crate :: schema:: parser:: parse_message_type;
0 commit comments