Skip to content

Commit 3c44c15

Browse files
committed
fix null count stats computation (apache#18276)
1 parent cd065ee commit 3c44c15

File tree

2 files changed

+35
-7
lines changed

2 files changed

+35
-7
lines changed

datafusion/datasource-parquet/src/metadata.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ impl<'a> DFParquetMetadata<'a> {
297297
summarize_min_max_null_counts(
298298
&mut accumulators,
299299
idx,
300-
num_rows,
301300
&stats_converter,
302301
row_groups_metadata,
303302
)
@@ -415,7 +414,6 @@ struct StatisticsAccumulators<'a> {
415414
fn summarize_min_max_null_counts(
416415
accumulators: &mut StatisticsAccumulators,
417416
arrow_schema_index: usize,
418-
num_rows: usize,
419417
stats_converter: &StatisticsConverter,
420418
row_groups_metadata: &[RowGroupMetaData],
421419
) -> Result<()> {
@@ -447,11 +445,14 @@ fn summarize_min_max_null_counts(
447445
);
448446
}
449447

450-
accumulators.null_counts_array[arrow_schema_index] =
451-
Precision::Exact(match sum(&null_counts) {
452-
Some(null_count) => null_count as usize,
453-
None => num_rows,
454-
});
448+
accumulators.null_counts_array[arrow_schema_index] = match sum(&null_counts) {
449+
Some(null_count) => Precision::Exact(null_count as usize),
450+
None => match null_counts.len() {
451+
// If sum() returned None we either have no rows or all values are null
452+
0 => Precision::Exact(0),
453+
_ => Precision::Absent,
454+
},
455+
};
455456

456457
Ok(())
457458
}

datafusion/sqllogictest/test_files/parquet.slt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,3 +862,30 @@ select part, k, v from t order by k
862862

863863
statement ok
864864
DROP TABLE t;
865+
866+
# Regression test for files with stats on some columns and not others
867+
# See https://github.com/apache/datafusion/pull/18276
868+
869+
query I
870+
COPY (SELECT 1::int AS a, 2::int as b)
871+
TO 'test_files/scratch/parquet/mixed_stats.parquet'
872+
STORED AS PARQUET OPTIONS (
873+
'STATISTICS_ENABLED::b' 'none'
874+
);
875+
----
876+
1
877+
878+
statement ok
879+
CREATE EXTERNAL TABLE t
880+
STORED AS PARQUET
881+
LOCATION 'test_files/scratch/parquet/mixed_stats.parquet';
882+
883+
query I
884+
SELECT b
885+
FROM t
886+
WHERE b = 2;
887+
----
888+
2
889+
890+
statement ok
891+
DROP TABLE t;

0 commit comments

Comments
 (0)