Skip to content

Commit dc7535a

Browse files
authored
Support FixedSizedBinaryArray Parquet Data Page Statistics (#11200)
* failing add data page stats for fixed size binary array * fix * failing fix * fix
1 parent 03848c5 commit dc7535a

File tree

2 files changed

+28
-2
lines changed

2 files changed

+28
-2
lines changed

datafusion/core/src/datasource/physical_plan/parquet/statistics.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,20 @@ make_data_page_stats_iterator!(
726726
ByteArray
727727
);
728728

729+
make_data_page_stats_iterator!(
730+
MaxFixedLenByteArrayDataPageStatsIterator,
731+
|x: &PageIndex<FixedLenByteArray>| { x.max.clone() },
732+
Index::FIXED_LEN_BYTE_ARRAY,
733+
FixedLenByteArray
734+
);
735+
736+
make_data_page_stats_iterator!(
737+
MinFixedLenByteArrayDataPageStatsIterator,
738+
|x: &PageIndex<FixedLenByteArray>| { x.min.clone() },
739+
Index::FIXED_LEN_BYTE_ARRAY,
740+
FixedLenByteArray
741+
);
742+
729743
macro_rules! get_data_page_statistics {
730744
($stat_type_prefix: ident, $data_type: ident, $iterator: ident) => {
731745
paste! {
@@ -903,7 +917,19 @@ macro_rules! get_data_page_statistics {
903917
new_empty_array(&DataType::Time64(unit.clone()))
904918
}
905919
})
906-
}
920+
},
921+
Some(DataType::FixedSizeBinary(size)) => {
922+
Ok(Arc::new(
923+
FixedSizeBinaryArray::try_from_iter(
924+
[<$stat_type_prefix FixedLenByteArrayDataPageStatsIterator>]::new($iterator)
925+
.flat_map(|x| x.into_iter())
926+
.filter_map(|x| x)
927+
).unwrap_or_else(|e| {
928+
log::debug!("FixedSizeBinary statistics is invalid: {}", e);
929+
FixedSizeBinaryArray::new(*size, vec![].into(), None)
930+
})
931+
))
932+
},
907933
_ => unimplemented!()
908934
}
909935
}

datafusion/core/tests/parquet/arrow_statistics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1861,7 +1861,7 @@ async fn test_byte() {
18611861
expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
18621862
expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
18631863
column_name: "service_fixedsize",
1864-
check: Check::RowGroup,
1864+
check: Check::Both,
18651865
}
18661866
.run();
18671867

0 commit comments

Comments
 (0)