Do not push down filter if it contains Unevaluable expression

HyukjinKwon · HyukjinKwon · commit 9677b59bba82 · 2024-07-12T12:42:06.000+09:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -67,7 +67,7 @@ trait OperationHelper extends AliasHelper with PredicateHelper {
           empty
         }
 
-      case Filter(condition, child) =>
+      case Filter(condition, child) if !condition.exists(_.isInstanceOf[Unevaluable]) =>
         val (fields, filters, other, aliases) = collectProjectsAndFilters(child, alwaysInline)
         // When collecting projects and filters, we effectively push down filters through
         // projects. We need to meet the following conditions to do so:
@@ -115,6 +115,8 @@ object PhysicalOperation extends OperationHelper {
     val (fields, filters, child, _) = collectProjectsAndFilters(plan, alwaysInline)
     // If more than 2 filters are collected, they must all be deterministic.
     if (filters.length > 1) assert(filters.forall(_.deterministic))
+    // Unevaluable expressions should not be pushed
+    assert(filters.forall(!_.exists(_.isInstanceOf[Unevaluable])))
     Some((
       fields.getOrElse(child.output),
       filters.flatMap(splitConjunctivePredicates),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.execution.python
 
-import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest}
-import org.apache.spark.sql.functions.{array, count, transform}
+import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest, Row}
+import org.apache.spark.sql.functions.{array, col, count, transform}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.LongType
 
@@ -124,4 +124,16 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
       context = ExpectedContext(
         "transform", s".*${this.getClass.getSimpleName}.*"))
   }
+
+  test("SPARK-48666: Python UDF execution against partitioned column") {
+    assume(shouldTestPythonUDFs)
+    withTable("t") {
+      spark.range(1).selectExpr("id AS t", "(id + 1) AS p").write.partitionBy("p").saveAsTable("t")
+      val table = spark.table("t")
+      val newTable = table.withColumn("new_column", pythonTestUDF(table("p")))
+      val df = newTable.as("t1").join(
+        newTable.as("t2"), col("t1.new_column") === col("t2.new_column"))
+      checkAnswer(df, Row(0, 1, 1, 0, 1, 1))
+    }
+  }
 }