Skip to content

Commit 1333f3f

Browse files
authored
perf: Use specialized decoding for all predicates for Parquet dictionary encoding (#24403)
1 parent 20d7aeb commit 1333f3f

File tree

8 files changed

+27
-20
lines changed

8 files changed

+27
-20
lines changed

crates/polars-parquet/src/arrow/read/deserialize/binview/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ impl utils::Decoder for BinViewDecoder {
496496
fn evaluate_predicate(
497497
&mut self,
498498
state: &utils::State<'_, Self>,
499-
predicate: &SpecializedParquetColumnExpr,
499+
predicate: Option<&SpecializedParquetColumnExpr>,
500500
pred_true_mask: &mut BitmapBuilder,
501501
dict_mask: Option<&Bitmap>,
502502
) -> ParquetResult<bool> {
@@ -515,6 +515,10 @@ impl utils::Decoder for BinViewDecoder {
515515
return Ok(true);
516516
}
517517

518+
let Some(predicate) = predicate else {
519+
return Ok(false);
520+
};
521+
518522
use {SpecializedParquetColumnExpr as Spce, StateTranslation as St};
519523
match (&state.translation, predicate) {
520524
(St::Plain(iter), Spce::Equal(needle)) => {

crates/polars-parquet/src/arrow/read/deserialize/boolean.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ impl Decoder for BooleanDecoder {
330330
fn evaluate_predicate(
331331
&mut self,
332332
_state: &utils::State<'_, Self>,
333-
_predicate: &SpecializedParquetColumnExpr,
333+
_predicate: Option<&SpecializedParquetColumnExpr>,
334334
_pred_true_mask: &mut BitmapBuilder,
335335
_dict_mask: Option<&Bitmap>,
336336
) -> ParquetResult<bool> {

crates/polars-parquet/src/arrow/read/deserialize/categorical.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ impl<T: DictionaryKey + IndexMapping<Output = T::AlignedBytes>> utils::Decoder
7979
fn evaluate_predicate(
8080
&mut self,
8181
state: &utils::State<'_, Self>,
82-
_predicate: &SpecializedParquetColumnExpr,
82+
_predicate: Option<&SpecializedParquetColumnExpr>,
8383
pred_true_mask: &mut BitmapBuilder,
8484
dict_mask: Option<&Bitmap>,
8585
) -> ParquetResult<bool> {

crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ impl Decoder for BinaryDecoder {
548548
fn evaluate_predicate(
549549
&mut self,
550550
_state: &utils::State<'_, Self>,
551-
_predicate: &SpecializedParquetColumnExpr,
551+
_predicate: Option<&SpecializedParquetColumnExpr>,
552552
_pred_true_mask: &mut BitmapBuilder,
553553
_dict_mask: Option<&Bitmap>,
554554
) -> ParquetResult<bool> {

crates/polars-parquet/src/arrow/read/deserialize/null.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ impl utils::Decoder for NullDecoder {
7373
fn evaluate_predicate(
7474
&mut self,
7575
_state: &utils::State<'_, Self>,
76-
_predicate: &SpecializedParquetColumnExpr,
76+
_predicate: Option<&SpecializedParquetColumnExpr>,
7777
_pred_true_mask: &mut BitmapBuilder,
7878
_dict_mask: Option<&Bitmap>,
7979
) -> ParquetResult<bool> {

crates/polars-parquet/src/arrow/read/deserialize/primitive/float.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ where
164164
fn evaluate_predicate(
165165
&mut self,
166166
state: &utils::State<'_, Self>,
167-
_predicate: &SpecializedParquetColumnExpr,
167+
_predicate: Option<&SpecializedParquetColumnExpr>,
168168
pred_true_mask: &mut BitmapBuilder,
169169
dict_mask: Option<&Bitmap>,
170170
) -> ParquetResult<bool> {

crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,15 @@ where
193193
fn evaluate_predicate(
194194
&mut self,
195195
state: &utils::State<'_, Self>,
196-
predicate: &SpecializedParquetColumnExpr,
196+
predicate: Option<&SpecializedParquetColumnExpr>,
197197
pred_true_mask: &mut BitmapBuilder,
198198
dict_mask: Option<&Bitmap>,
199199
) -> ParquetResult<bool> {
200+
// @Performance: This should be added
201+
if state.page_validity.is_some() {
202+
return Ok(false);
203+
}
204+
200205
if let StateTranslation::Dictionary(values) = &state.translation {
201206
let dict_mask = dict_mask.unwrap();
202207
super::super::dictionary_encoded::predicate::decode(
@@ -211,10 +216,9 @@ where
211216
return Ok(false);
212217
}
213218

214-
// @Performance: This should be added
215-
if state.page_validity.is_some() {
219+
let Some(predicate) = predicate else {
216220
return Ok(false);
217-
}
221+
};
218222

219223
use SpecializedParquetColumnExpr as S;
220224
match (&state.translation, predicate) {

crates/polars-parquet/src/arrow/read/deserialize/utils/mod.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ pub(super) trait Decoder: Sized {
319319
fn evaluate_predicate(
320320
&mut self,
321321
state: &State<'_, Self>,
322-
predicate: &SpecializedParquetColumnExpr,
322+
predicate: Option<&SpecializedParquetColumnExpr>,
323323
pred_true_mask: &mut BitmapBuilder,
324324
dict_mask: Option<&Bitmap>,
325325
) -> ParquetResult<bool>;
@@ -613,14 +613,12 @@ impl<D: Decoder> PageDecoder<D> {
613613
// handled in the kernels. If it cannot be handled in the kernels, catch it here
614614
// and load it as if it weren't filtered.
615615
let mut page_ptm = BitmapBuilder::new();
616-
if let Some(specialized_pred) = specialized_pred
617-
&& self.decoder.evaluate_predicate(
618-
&state,
619-
specialized_pred,
620-
&mut page_ptm,
621-
dict_mask.as_ref(),
622-
)?
623-
{
616+
if self.decoder.evaluate_predicate(
617+
&state,
618+
specialized_pred,
619+
&mut page_ptm,
620+
dict_mask.as_ref(),
621+
)? {
624622
let num_filtered_values = page_ptm.set_bits();
625623
if page_ptm.set_bits() == 0 {
626624
pred_true_mask.extend_constant(page_ptm.len(), false);
@@ -652,7 +650,8 @@ impl<D: Decoder> PageDecoder<D> {
652650
pred_true_mask.extend_from_bitmap(&page_ptm);
653651

654652
if p.include_values {
655-
if let SpecializedParquetColumnExpr::Equal(needle) = specialized_pred {
653+
if let Some(SpecializedParquetColumnExpr::Equal(needle)) = specialized_pred
654+
{
656655
self.decoder.extend_constant(
657656
&mut target,
658657
num_filtered_values,

0 commit comments

Comments
 (0)