@@ -56,11 +56,10 @@ use arrow::datatypes::{
5656use  arrow:: util:: bit_util; 
5757
5858use  crate :: arrow:: converter:: { 
59-     BinaryArrayConverter ,  BinaryConverter ,  Converter ,  DecimalArrayConverter , 
60-     DecimalConverter ,  FixedLenBinaryConverter ,  FixedSizeArrayConverter , 
61-     Int96ArrayConverter ,  Int96Converter ,  IntervalDayTimeArrayConverter , 
62-     IntervalDayTimeConverter ,  IntervalYearMonthArrayConverter , 
63-     IntervalYearMonthConverter ,  Utf8ArrayConverter ,  Utf8Converter , 
59+     Converter ,  DecimalArrayConverter ,  DecimalConverter ,  FixedLenBinaryConverter , 
60+     FixedSizeArrayConverter ,  Int96ArrayConverter ,  Int96Converter , 
61+     IntervalDayTimeArrayConverter ,  IntervalDayTimeConverter , 
62+     IntervalYearMonthArrayConverter ,  IntervalYearMonthConverter , 
6463} ; 
6564use  crate :: arrow:: record_reader:: buffer:: { ScalarValue ,  ValuesBuffer } ; 
6665use  crate :: arrow:: record_reader:: { GenericRecordReader ,  RecordReader } ; 
@@ -70,8 +69,8 @@ use crate::column::page::PageIterator;
7069use  crate :: column:: reader:: decoder:: ColumnValueDecoder ; 
7170use  crate :: column:: reader:: ColumnReaderImpl ; 
7271use  crate :: data_type:: { 
73-     BoolType ,  ByteArrayType ,   DataType ,  DoubleType ,  FixedLenByteArrayType ,  FloatType , 
74-     Int32Type ,   Int64Type ,  Int96Type , 
72+     BoolType ,  DataType ,  DoubleType ,  FixedLenByteArrayType ,  FloatType ,   Int32Type , 
73+     Int64Type ,  Int96Type , 
7574} ; 
7675use  crate :: errors:: { ParquetError ,  ParquetError :: ArrowError ,  Result } ; 
7776use  crate :: file:: reader:: { FilePageIterator ,  FileReader } ; 
@@ -81,9 +80,12 @@ use crate::schema::types::{
8180use  crate :: schema:: visitor:: TypeVisitor ; 
8281
8382mod  byte_array; 
83+ mod  byte_array_dictionary; 
84+ mod  dictionary_buffer; 
8485mod  offset_buffer; 
8586
8687pub  use  byte_array:: make_byte_array_reader; 
88+ pub  use  byte_array_dictionary:: make_byte_array_dictionary_reader; 
8789
8890/// Array reader reads parquet data into arrow array. 
8991pub  trait  ArrayReader  { 
@@ -271,7 +273,8 @@ where
271273                . clone ( ) , 
272274        } ; 
273275
274-         let  record_reader = RecordReader :: < T > :: new_with_options ( column_desc. clone ( ) ,  null_mask_only) ; 
276+         let  record_reader =
277+             RecordReader :: < T > :: new_with_options ( column_desc. clone ( ) ,  null_mask_only) ; 
275278
276279        Ok ( Self  { 
277280            data_type, 
@@ -1783,35 +1786,12 @@ impl<'a> ArrayReaderBuilder {
17831786                ) ?, 
17841787            ) ) , 
17851788            PhysicalType :: BYTE_ARRAY  => match  arrow_type { 
1786-                 // TODO: Replace with optimised dictionary reader (#171) 
1787-                 Some ( ArrowType :: Dictionary ( _,  _) )  => { 
1788-                     match  cur_type. get_basic_info ( ) . converted_type ( )  { 
1789-                         ConvertedType :: UTF8  => { 
1790-                             let  converter = Utf8Converter :: new ( Utf8ArrayConverter  { } ) ; 
1791-                             Ok ( Box :: new ( ComplexObjectArrayReader :: < 
1792-                                 ByteArrayType , 
1793-                                 Utf8Converter , 
1794-                             > :: new ( 
1795-                                 page_iterator, 
1796-                                 column_desc, 
1797-                                 converter, 
1798-                                 arrow_type, 
1799-                             ) ?) ) 
1800-                         } 
1801-                         _ => { 
1802-                             let  converter = BinaryConverter :: new ( BinaryArrayConverter  { } ) ; 
1803-                             Ok ( Box :: new ( ComplexObjectArrayReader :: < 
1804-                                 ByteArrayType , 
1805-                                 BinaryConverter , 
1806-                             > :: new ( 
1807-                                 page_iterator, 
1808-                                 column_desc, 
1809-                                 converter, 
1810-                                 arrow_type, 
1811-                             ) ?) ) 
1812-                         } 
1813-                     } 
1814-                 } 
1789+                 Some ( ArrowType :: Dictionary ( _,  _) )  => make_byte_array_dictionary_reader ( 
1790+                     page_iterator, 
1791+                     column_desc, 
1792+                     arrow_type, 
1793+                     null_mask_only, 
1794+                 ) , 
18151795                _ => make_byte_array_reader ( 
18161796                    page_iterator, 
18171797                    column_desc, 
@@ -2025,7 +2005,7 @@ mod tests {
20252005    use  crate :: arrow:: schema:: parquet_to_arrow_schema; 
20262006    use  crate :: basic:: { Encoding ,  Type  as  PhysicalType } ; 
20272007    use  crate :: column:: page:: { Page ,  PageReader } ; 
2028-     use  crate :: data_type:: { ByteArray ,  DataType ,  Int32Type ,  Int64Type } ; 
2008+     use  crate :: data_type:: { ByteArray ,  ByteArrayType ,   DataType ,  Int32Type ,  Int64Type } ; 
20292009    use  crate :: errors:: Result ; 
20302010    use  crate :: file:: reader:: { FileReader ,  SerializedFileReader } ; 
20312011    use  crate :: schema:: parser:: parse_message_type; 
0 commit comments