Skip to content

Commit 281c1ca

Browse files
mgaido91cloud-fan
authored andcommitted
[SPARK-23973][SQL] Remove consecutive Sorts
## What changes were proposed in this pull request? In SPARK-23375 we introduced the ability of removing `Sort` operation during query optimization if the data is already sorted. In this follow-up we remove also a `Sort` which is followed by another `Sort`: in this case the first sort is not needed and can be safely removed. The PR starts from henryr's comment: #20560 (comment). So credit should be given to him. ## How was this patch tested? added UT Author: Marco Gaido <marcogaido91@gmail.com> Closes #21072 from mgaido91/SPARK-23973.
1 parent 428b903 commit 281c1ca

File tree

2 files changed

+63
-9
lines changed

2 files changed

+63
-9
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -767,12 +767,29 @@ object EliminateSorts extends Rule[LogicalPlan] {
767767
}
768768

769769
/**
770-
* Removes Sort operation if the child is already sorted
770+
* Removes redundant Sort operation. This can happen:
771+
* 1) if the child is already sorted
772+
* 2) if there is another Sort operator separated by 0...n Project/Filter operators
771773
*/
772774
object RemoveRedundantSorts extends Rule[LogicalPlan] {
773-
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
775+
def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
774776
case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
775777
child
778+
case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))
779+
}
780+
781+
def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = plan match {
782+
case Sort(_, _, child) => recursiveRemoveSort(child)
783+
case other if canEliminateSort(other) =>
784+
other.withNewChildren(other.children.map(recursiveRemoveSort))
785+
case _ => plan
786+
}
787+
788+
def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
789+
case p: Project => p.projectList.forall(_.deterministic)
790+
case f: Filter => f.condition.deterministic
791+
case _: ResolvedHint => true
792+
case _ => false
776793
}
777794
}
778795

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSortsSuite.scala

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,12 @@
1717

1818
package org.apache.spark.sql.catalyst.optimizer
1919

20-
import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
21-
import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
2220
import org.apache.spark.sql.catalyst.dsl.expressions._
2321
import org.apache.spark.sql.catalyst.dsl.plans._
2422
import org.apache.spark.sql.catalyst.expressions._
2523
import org.apache.spark.sql.catalyst.plans._
2624
import org.apache.spark.sql.catalyst.plans.logical._
2725
import org.apache.spark.sql.catalyst.rules._
28-
import org.apache.spark.sql.internal.SQLConf
29-
import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL}
3026

3127
class RemoveRedundantSortsSuite extends PlanTest {
3228

@@ -42,15 +38,15 @@ class RemoveRedundantSortsSuite extends PlanTest {
4238

4339
test("remove redundant order by") {
4440
val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
45-
val unnecessaryReordered = orderedPlan.select('a).orderBy('a.asc, 'b.desc_nullsFirst)
41+
val unnecessaryReordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
4642
val optimized = Optimize.execute(unnecessaryReordered.analyze)
47-
val correctAnswer = orderedPlan.select('a).analyze
43+
val correctAnswer = orderedPlan.limit(2).select('a).analyze
4844
comparePlans(Optimize.execute(optimized), correctAnswer)
4945
}
5046

5147
test("do not remove sort if the order is different") {
5248
val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
53-
val reorderedDifferently = orderedPlan.select('a).orderBy('a.asc, 'b.desc)
49+
val reorderedDifferently = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc)
5450
val optimized = Optimize.execute(reorderedDifferently.analyze)
5551
val correctAnswer = reorderedDifferently.analyze
5652
comparePlans(optimized, correctAnswer)
@@ -72,6 +68,14 @@ class RemoveRedundantSortsSuite extends PlanTest {
7268
comparePlans(optimized, correctAnswer)
7369
}
7470

71+
test("different sorts are not simplified if limit is in between") {
72+
val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10))
73+
.orderBy('a.asc)
74+
val optimized = Optimize.execute(orderedPlan.analyze)
75+
val correctAnswer = orderedPlan.analyze
76+
comparePlans(optimized, correctAnswer)
77+
}
78+
7579
test("range is already sorted") {
7680
val inputPlan = Range(1L, 1000L, 1, 10)
7781
val orderedPlan = inputPlan.orderBy('id.asc)
@@ -98,4 +102,37 @@ class RemoveRedundantSortsSuite extends PlanTest {
98102
val correctAnswer = groupedAndResorted.analyze
99103
comparePlans(optimized, correctAnswer)
100104
}
105+
106+
test("remove two consecutive sorts") {
107+
val orderedTwice = testRelation.orderBy('a.asc).orderBy('b.desc)
108+
val optimized = Optimize.execute(orderedTwice.analyze)
109+
val correctAnswer = testRelation.orderBy('b.desc).analyze
110+
comparePlans(optimized, correctAnswer)
111+
}
112+
113+
test("remove sorts separated by Filter/Project operators") {
114+
val orderedTwiceWithProject = testRelation.orderBy('a.asc).select('b).orderBy('b.desc)
115+
val optimizedWithProject = Optimize.execute(orderedTwiceWithProject.analyze)
116+
val correctAnswerWithProject = testRelation.select('b).orderBy('b.desc).analyze
117+
comparePlans(optimizedWithProject, correctAnswerWithProject)
118+
119+
val orderedTwiceWithFilter =
120+
testRelation.orderBy('a.asc).where('b > Literal(0)).orderBy('b.desc)
121+
val optimizedWithFilter = Optimize.execute(orderedTwiceWithFilter.analyze)
122+
val correctAnswerWithFilter = testRelation.where('b > Literal(0)).orderBy('b.desc).analyze
123+
comparePlans(optimizedWithFilter, correctAnswerWithFilter)
124+
125+
val orderedTwiceWithBoth =
126+
testRelation.orderBy('a.asc).select('b).where('b > Literal(0)).orderBy('b.desc)
127+
val optimizedWithBoth = Optimize.execute(orderedTwiceWithBoth.analyze)
128+
val correctAnswerWithBoth =
129+
testRelation.select('b).where('b > Literal(0)).orderBy('b.desc).analyze
130+
comparePlans(optimizedWithBoth, correctAnswerWithBoth)
131+
132+
val orderedThrice = orderedTwiceWithBoth.select(('b + 1).as('c)).orderBy('c.asc)
133+
val optimizedThrice = Optimize.execute(orderedThrice.analyze)
134+
val correctAnswerThrice = testRelation.select('b).where('b > Literal(0))
135+
.select(('b + 1).as('c)).orderBy('c.asc).analyze
136+
comparePlans(optimizedThrice, correctAnswerThrice)
137+
}
101138
}

0 commit comments

Comments
 (0)