@@ -247,6 +247,17 @@ where
247247 pages : Box < dyn PageIterator > ,
248248 column_desc : ColumnDescPtr ,
249249 arrow_type : Option < ArrowType > ,
250+ ) -> Result < Self > {
251+ Self :: new_with_options ( pages, column_desc, arrow_type, false )
252+ }
253+
254+ /// Construct primitive array reader with ability to only compute null mask and not
255+ /// buffer level data
256+ pub fn new_with_options (
257+ pages : Box < dyn PageIterator > ,
258+ column_desc : ColumnDescPtr ,
259+ arrow_type : Option < ArrowType > ,
260+ null_mask_only : bool ,
250261 ) -> Result < Self > {
251262 // Check if Arrow type is specified, else create it from Parquet type
252263 let data_type = match arrow_type {
@@ -256,7 +267,7 @@ where
256267 . clone ( ) ,
257268 } ;
258269
259- let record_reader = RecordReader :: < T > :: new ( column_desc. clone ( ) ) ;
270+ let record_reader = RecordReader :: < T > :: new_with_options ( column_desc. clone ( ) , null_mask_only ) ;
260271
261272 Ok ( Self {
262273 data_type,
@@ -1350,19 +1361,26 @@ impl<'a> TypeVisitor<Option<Box<dyn ArrayReader>>, &'a ArrayReaderBuilderContext
13501361 let mut new_context = context. clone ( ) ;
13511362 new_context. path . append ( vec ! [ cur_type. name( ) . to_string( ) ] ) ;
13521363
1353- match cur_type. get_basic_info ( ) . repetition ( ) {
1364+ let null_mask_only = match cur_type. get_basic_info ( ) . repetition ( ) {
13541365 Repetition :: REPEATED => {
13551366 new_context. def_level += 1 ;
13561367 new_context. rep_level += 1 ;
1368+ false
13571369 }
13581370 Repetition :: OPTIONAL => {
13591371 new_context. def_level += 1 ;
1372+
1373+ // Can just compute null mask if no parent
1374+ context. def_level == 0 && context. rep_level == 0
13601375 }
1361- _ => ( ) ,
1362- }
1376+ _ => false ,
1377+ } ;
13631378
1364- let reader =
1365- self . build_for_primitive_type_inner ( cur_type. clone ( ) , & new_context) ?;
1379+ let reader = self . build_for_primitive_type_inner (
1380+ cur_type. clone ( ) ,
1381+ & new_context,
1382+ null_mask_only,
1383+ ) ?;
13661384
13671385 if cur_type. get_basic_info ( ) . repetition ( ) == Repetition :: REPEATED {
13681386 Err ( ArrowError (
@@ -1641,6 +1659,7 @@ impl<'a> ArrayReaderBuilder {
16411659 & self ,
16421660 cur_type : TypePtr ,
16431661 context : & ' a ArrayReaderBuilderContext ,
1662+ null_mask_only : bool ,
16441663 ) -> Result < Box < dyn ArrayReader > > {
16451664 let column_desc = Arc :: new ( ColumnDescriptor :: new (
16461665 cur_type. clone ( ) ,
@@ -1658,30 +1677,39 @@ impl<'a> ArrayReaderBuilder {
16581677 . map ( |f| f. data_type ( ) . clone ( ) ) ;
16591678
16601679 match cur_type. get_physical_type ( ) {
1661- PhysicalType :: BOOLEAN => Ok ( Box :: new ( PrimitiveArrayReader :: < BoolType > :: new (
1662- page_iterator,
1663- column_desc,
1664- arrow_type,
1665- ) ?) ) ,
1680+ PhysicalType :: BOOLEAN => Ok ( Box :: new (
1681+ PrimitiveArrayReader :: < BoolType > :: new_with_options (
1682+ page_iterator,
1683+ column_desc,
1684+ arrow_type,
1685+ null_mask_only,
1686+ ) ?,
1687+ ) ) ,
16661688 PhysicalType :: INT32 => {
16671689 if let Some ( ArrowType :: Null ) = arrow_type {
16681690 Ok ( Box :: new ( NullArrayReader :: < Int32Type > :: new (
16691691 page_iterator,
16701692 column_desc,
16711693 ) ?) )
16721694 } else {
1673- Ok ( Box :: new ( PrimitiveArrayReader :: < Int32Type > :: new (
1674- page_iterator,
1675- column_desc,
1676- arrow_type,
1677- ) ?) )
1695+ Ok ( Box :: new (
1696+ PrimitiveArrayReader :: < Int32Type > :: new_with_options (
1697+ page_iterator,
1698+ column_desc,
1699+ arrow_type,
1700+ null_mask_only,
1701+ ) ?,
1702+ ) )
16781703 }
16791704 }
1680- PhysicalType :: INT64 => Ok ( Box :: new ( PrimitiveArrayReader :: < Int64Type > :: new (
1681- page_iterator,
1682- column_desc,
1683- arrow_type,
1684- ) ?) ) ,
1705+ PhysicalType :: INT64 => Ok ( Box :: new (
1706+ PrimitiveArrayReader :: < Int64Type > :: new_with_options (
1707+ page_iterator,
1708+ column_desc,
1709+ arrow_type,
1710+ null_mask_only,
1711+ ) ?,
1712+ ) ) ,
16851713 PhysicalType :: INT96 => {
16861714 // get the optional timezone information from arrow type
16871715 let timezone = arrow_type
@@ -1705,18 +1733,22 @@ impl<'a> ArrayReaderBuilder {
17051733 arrow_type,
17061734 ) ?) )
17071735 }
1708- PhysicalType :: FLOAT => Ok ( Box :: new ( PrimitiveArrayReader :: < FloatType > :: new (
1709- page_iterator,
1710- column_desc,
1711- arrow_type,
1712- ) ?) ) ,
1713- PhysicalType :: DOUBLE => {
1714- Ok ( Box :: new ( PrimitiveArrayReader :: < DoubleType > :: new (
1736+ PhysicalType :: FLOAT => Ok ( Box :: new (
1737+ PrimitiveArrayReader :: < FloatType > :: new_with_options (
17151738 page_iterator,
17161739 column_desc,
17171740 arrow_type,
1718- ) ?) )
1719- }
1741+ null_mask_only,
1742+ ) ?,
1743+ ) ) ,
1744+ PhysicalType :: DOUBLE => Ok ( Box :: new (
1745+ PrimitiveArrayReader :: < DoubleType > :: new_with_options (
1746+ page_iterator,
1747+ column_desc,
1748+ arrow_type,
1749+ null_mask_only,
1750+ ) ?,
1751+ ) ) ,
17201752 PhysicalType :: BYTE_ARRAY => {
17211753 if cur_type. get_basic_info ( ) . converted_type ( ) == ConvertedType :: UTF8 {
17221754 if let Some ( ArrowType :: LargeUtf8 ) = arrow_type {
0 commit comments