Skip to content

Commit 7505e0e

Browse files
skip optimization for map
1 parent 09ae668 commit 7505e0e

File tree

2 files changed

+21
-3
lines changed

2 files changed

+21
-3
lines changed

datafusion/datasource-parquet/src/opener.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,13 @@ use datafusion_execution::parquet_encryption::EncryptionFactory;
6060
use futures::{Stream, StreamExt, ready};
6161
use log::debug;
6262
use parquet::DecodeResult;
63+
use parquet::arrow::ParquetRecordBatchStreamBuilder;
6364
use parquet::arrow::arrow_reader::metrics::ArrowReaderMetrics;
6465
use parquet::arrow::arrow_reader::{
6566
ArrowReaderMetadata, ArrowReaderOptions, RowSelectionPolicy,
6667
};
6768
use parquet::arrow::async_reader::AsyncFileReader;
6869
use parquet::arrow::push_decoder::{ParquetPushDecoder, ParquetPushDecoderBuilder};
69-
use parquet::arrow::ParquetRecordBatchStreamBuilder;
7070
use parquet::file::metadata::{PageIndexPolicy, ParquetMetaDataReader};
7171

7272
/// Implements [`FileOpener`] for a parquet file

datafusion/datasource-parquet/src/row_filter.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,28 @@ impl TreeNodeVisitor<'_> for PushdownChecker<'_> {
424424
.first()
425425
.and_then(|a| a.as_any().downcast_ref::<Column>())
426426
{
427+
// For Map columns, get_field performs a runtime key lookup
428+
// rather than a schema-level field access. The entire Map
429+
// column must be read, so skip the struct field optimization
430+
// and let normal Column traversal handle it via
431+
// check_single_column.
432+
let is_map_column = self
433+
.file_schema
434+
.index_of(column.name())
435+
.ok()
436+
.map(|idx| {
437+
matches!(
438+
self.file_schema.field(idx).data_type(),
439+
DataType::Map(_, _)
440+
)
441+
})
442+
.unwrap_or(false);
443+
427444
let return_type = func.return_type();
428445

429-
if !DataType::is_nested(return_type)
430-
|| self.is_nested_type_supported(return_type)
446+
if !is_map_column
447+
&& (!DataType::is_nested(return_type)
448+
|| self.is_nested_type_supported(return_type))
431449
{
432450
// try to resolve all field name arguments to strinrg literals
433451
// if any argument is not a string literal, we can not determine the exact

0 commit comments

Comments
 (0)