pola-rs · ritchie46 · Oct 16, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
@@ -525,14 +525,6 @@ fn apply_multiple_elementwise<'a>(
 impl StatsEvaluator for ApplyExpr {
     fn should_read(&self, stats: &BatchStats) -> PolarsResult<bool> {
         let read = self.should_read_impl(stats)?;
-        if ExecutionState::new().verbose() {
-            if read {
-                eprintln!("parquet file must be read, statistics not sufficient for predicate.")
-            } else {
-                eprintln!("parquet file can be skipped, the statistics were sufficient to apply the predicate.")
-            }
-        }
-
         Ok(read)
     }
 }

@@ -354,7 +354,7 @@ mod stats {
         use ChunkCompareIneq as C;
         match op {
             Operator::Eq => apply_operator_stats_eq(min_max, literal),
-            Operator::NotEq => apply_operator_stats_eq(min_max, literal),
+            Operator::NotEq => apply_operator_stats_neq(min_max, literal),
             Operator::Gt => {
                 // Literal is bigger than max value, selection needs all rows.
                 C::gt(literal, min_max).map(|ca| ca.any()).unwrap_or(false)
@@ -457,10 +457,6 @@ mod stats {
 
     impl StatsEvaluator for BinaryExpr {
         fn should_read(&self, stats: &BatchStats) -> PolarsResult<bool> {
-            if std::env::var("POLARS_NO_PARQUET_STATISTICS").is_ok() {
-                return Ok(true);
-            }
-
             use Operator::*;
             match (
                 self.left.as_stats_evaluator(),

@@ -1,3 +1,4 @@
+use polars_core::config;
 use polars_core::prelude::*;
 use polars_parquet::read::statistics::{deserialize, Statistics};
 use polars_parquet::read::RowGroupMetadata;
@@ -50,18 +51,36 @@ pub fn read_this_row_group(
     md: &RowGroupMetadata,
     schema: &ArrowSchema,
 ) -> PolarsResult<bool> {
+    if std::env::var("POLARS_NO_PARQUET_STATISTICS").is_ok() {
+        return Ok(true);
+    }
+
+    let mut should_read = true;
+
     if let Some(pred) = predicate {
         if let Some(pred) = pred.as_stats_evaluator() {
             if let Some(stats) = collect_statistics(md, schema)? {
-                let should_read = pred.should_read(&stats);
+                let pred_result = pred.should_read(&stats);
+
                 // a parquet file may not have statistics of all columns
-                if matches!(should_read, Ok(false)) {
-                    return Ok(false);
-                } else if !matches!(should_read, Err(PolarsError::ColumnNotFound(_))) {
-                    let _ = should_read?;
+                match pred_result {
+                    Err(PolarsError::ColumnNotFound(errstr)) => {
+                        return Err(PolarsError::ColumnNotFound(errstr))
+                    },
+                    Ok(false) => should_read = false,
+                    _ => {},
                 }
             }
         }
     }
-    Ok(true)
+
+    if config::verbose() {
+        if should_read {
+            eprintln!("parquet row group must be read, statistics not sufficient for predicate.");
+        } else {
+            eprintln!("parquet row group can be skipped, the statistics were sufficient to apply the predicate.");
+        }
+    }
+
+    Ok(should_read)
 }