Skip to content

Commit 4c840e3

Browse files
wangyumsomani
authored and
GitHub Enterprise
committed
[CARMEL-5868] Backport SPARK-37199: Add deterministic field to QueryPlan (#879)
* [SPARK-37199][SQL] Add deterministic field to QueryPlan ### What changes were proposed in this pull request? We have a deterministic field in Expressions to check if an expression is deterministic, but we do not have a similar field in QueryPlan. We have a need for such a check in the QueryPlan sometimes, like in InlineCTE This proposal is to add a deterministic field to QueryPlan. More details in this document: https://docs.google.com/document/d/1eIiaSJf-Co2HhjsaQxFNGwUxobnHID4ZGmJMcVytREc/edit#heading=h.4cz970y1mk93 ### Why are the changes needed? We have a need for such a check in the QueryPlan sometimes, like in InlineCTE ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit tests Closes #34470 from somani/isDeterministic. Authored-by: Abhishek Somani <abhishek.somani@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com> (cherry picked from commit fe41d18) * Fix test error Co-authored-by: Abhishek Somani <abhishek.somani@databricks.com>
1 parent c9524ee commit 4c840e3

File tree

4 files changed

+50
-1
lines changed

4 files changed

+50
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ import org.apache.spark.sql.types._
2828
* An interface for expressions that contain a [[QueryPlan]].
2929
*/
3030
abstract class PlanExpression[T <: QueryPlan[_]] extends Expression {
31+
32+
override lazy val deterministic: Boolean = children.forall(_.deterministic) &&
33+
plan.deterministic
34+
3135
/** The id of the subquery expression. */
3236
def exprId: ExprId
3337

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
6666
AttributeSet.fromAttributeSets(expressions.map(_.references)) -- producedAttributes
6767
}
6868

69+
/**
70+
* Returns true when the all the expressions in the current node as well as all of its children
71+
* are deterministic
72+
*/
73+
lazy val deterministic: Boolean = expressions.forall(_.deterministic) &&
74+
children.forall(_.deterministic)
75+
6976
/**
7077
* Attributes that are referenced by expressions but not provided by this node's children.
7178
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite
2121
import org.apache.spark.sql.catalyst.TableIdentifier
2222
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
2323
import org.apache.spark.sql.catalyst.dsl.plans
24-
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ListQuery, Literal, NamedExpression}
24+
import org.apache.spark.sql.catalyst.expressions.{Add, Alias, AttributeReference, Expression, ListQuery, Literal, NamedExpression, Rand}
2525
import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project, Union}
2626
import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
2727
import org.apache.spark.sql.types.IntegerType
@@ -83,4 +83,32 @@ class QueryPlanSuite extends SparkFunSuite {
8383
assert(countRelationsInPlan == 2)
8484
assert(countRelationsInPlanAndSubqueries == 5)
8585
}
86+
87+
test("SPARK-37199: add a deterministic field to QueryPlan") {
88+
val a: NamedExpression = AttributeReference("a", IntegerType)()
89+
val aRand: NamedExpression = Alias(Add(a, Rand(1)), "aRand")()
90+
val deterministicPlan = Project(
91+
Seq(a),
92+
Filter(
93+
ListQuery(Project(
94+
Seq(a),
95+
UnresolvedRelation(TableIdentifier("t", None))
96+
)),
97+
UnresolvedRelation(TableIdentifier("t", None))
98+
)
99+
)
100+
assert(deterministicPlan.deterministic)
101+
102+
val nonDeterministicPlan = Project(
103+
Seq(aRand),
104+
Filter(
105+
ListQuery(Project(
106+
Seq(a),
107+
UnresolvedRelation(TableIdentifier("t", None))
108+
)),
109+
UnresolvedRelation(TableIdentifier("t", None))
110+
)
111+
)
112+
assert(!nonDeterministicPlan.deterministic)
113+
}
86114
}

sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1753,4 +1753,14 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
17531753
}
17541754
}
17551755
}
1756+
1757+
test("SPARK-37199: deterministic in QueryPlan considers subquery") {
1758+
val deterministicQueryPlan = sql("select (select 1 as b) as b")
1759+
.queryExecution.executedPlan
1760+
assert(deterministicQueryPlan.deterministic)
1761+
1762+
val nonDeterministicQueryPlan = sql("select (select rand(1) as b) as b")
1763+
.queryExecution.executedPlan
1764+
assert(!nonDeterministicQueryPlan.deterministic)
1765+
}
17561766
}

0 commit comments

Comments
 (0)