@@ -81,55 +81,56 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
81
81
*/
82
82
object SpecialLimits extends Strategy {
83
83
override def apply (plan : LogicalPlan ): Seq [SparkPlan ] = plan match {
84
- case ReturnAnswer (rootPlan) => rootPlan match {
85
- case Limit (IntegerLiteral (limit), Sort (order, true , child))
86
- if limit < conf.topKSortFallbackThreshold =>
87
- TakeOrderedAndProjectExec (limit, order, child.output, planLater(child)) :: Nil
88
- case Limit (IntegerLiteral (limit), Project (projectList, Sort (order, true , child)))
89
- if limit < conf.topKSortFallbackThreshold =>
90
- TakeOrderedAndProjectExec (limit, order, projectList, planLater(child)) :: Nil
84
+ // Call `planTakeOrdered` first which matches a larger plan.
85
+ case ReturnAnswer (rootPlan) => planTakeOrdered(rootPlan).getOrElse(rootPlan match {
86
+ // We should match the combination of limit and offset first, to get the optimal physical
87
+ // plan, instead of planning limit and offset separately.
88
+ case LimitAndOffset (limit, offset, child) =>
89
+ CollectLimitExec (limit = limit, child = planLater(child), offset = offset)
90
+ case OffsetAndLimit (offset, limit, child) =>
91
+ // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
92
+ CollectLimitExec (limit = offset + limit, child = planLater(child), offset = offset)
91
93
case Limit (IntegerLiteral (limit), child) =>
92
- CollectLimitExec (limit, planLater(child)) :: Nil
93
- case LimitAndOffset (IntegerLiteral (limit), IntegerLiteral (offset),
94
- Sort (order, true , child)) if limit + offset < conf.topKSortFallbackThreshold =>
95
- TakeOrderedAndProjectExec (
96
- limit, order, child.output, planLater(child), offset) :: Nil
97
- case LimitAndOffset (IntegerLiteral (limit), IntegerLiteral (offset),
98
- Project (projectList, Sort (order, true , child)))
99
- if limit + offset < conf.topKSortFallbackThreshold =>
100
- TakeOrderedAndProjectExec (
101
- limit, order, projectList, planLater(child), offset) :: Nil
102
- case LimitAndOffset (IntegerLiteral (limit), IntegerLiteral (offset), child) =>
103
- CollectLimitExec (limit, planLater(child), offset) :: Nil
94
+ CollectLimitExec (limit = limit, child = planLater(child))
104
95
case logical.Offset (IntegerLiteral (offset), child) =>
105
- CollectLimitExec (child = planLater(child), offset = offset) :: Nil
96
+ CollectLimitExec (child = planLater(child), offset = offset)
106
97
case Tail (IntegerLiteral (limit), child) =>
107
- CollectTailExec (limit, planLater(child)) :: Nil
108
- case other => planLater(other) :: Nil
109
- }
98
+ CollectTailExec (limit, planLater(child))
99
+ case other => planLater(other)
100
+ }) :: Nil
101
+
102
+ case other => planTakeOrdered(other).toSeq
103
+ }
104
+
105
+ private def planTakeOrdered (plan : LogicalPlan ): Option [SparkPlan ] = plan match {
106
+ // We should match the combination of limit and offset first, to get the optimal physical
107
+ // plan, instead of planning limit and offset separately.
108
+ case LimitAndOffset (limit, offset, Sort (order, true , child))
109
+ if limit < conf.topKSortFallbackThreshold =>
110
+ Some (TakeOrderedAndProjectExec (
111
+ limit, order, child.output, planLater(child), offset))
112
+ case LimitAndOffset (limit, offset, Project (projectList, Sort (order, true , child)))
113
+ if limit < conf.topKSortFallbackThreshold =>
114
+ Some (TakeOrderedAndProjectExec (
115
+ limit, order, projectList, planLater(child), offset))
116
+ // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
117
+ case OffsetAndLimit (offset, limit, Sort (order, true , child))
118
+ if offset + limit < conf.topKSortFallbackThreshold =>
119
+ Some (TakeOrderedAndProjectExec (
120
+ offset + limit, order, child.output, planLater(child), offset))
121
+ case OffsetAndLimit (offset, limit, Project (projectList, Sort (order, true , child)))
122
+ if offset + limit < conf.topKSortFallbackThreshold =>
123
+ Some (TakeOrderedAndProjectExec (
124
+ offset + limit, order, projectList, planLater(child), offset))
110
125
case Limit (IntegerLiteral (limit), Sort (order, true , child))
111
126
if limit < conf.topKSortFallbackThreshold =>
112
- TakeOrderedAndProjectExec (limit, order, child.output, planLater(child)) :: Nil
127
+ Some (TakeOrderedAndProjectExec (
128
+ limit, order, child.output, planLater(child)))
113
129
case Limit (IntegerLiteral (limit), Project (projectList, Sort (order, true , child)))
114
130
if limit < conf.topKSortFallbackThreshold =>
115
- TakeOrderedAndProjectExec (limit, order, projectList, planLater(child)) :: Nil
116
- // This is a global LIMIT and OFFSET over a logical sorting operator,
117
- // where the sum of specified limit and specified offset is less than a heuristic threshold.
118
- // In this case we generate a physical top-K sorting operator, passing down
119
- // the limit and offset values to be evaluated inline during the physical
120
- // sorting operation for greater efficiency.
121
- case LimitAndOffset (IntegerLiteral (limit), IntegerLiteral (offset),
122
- Sort (order, true , child)) if limit + offset < conf.topKSortFallbackThreshold =>
123
- TakeOrderedAndProjectExec (
124
- limit, order, child.output, planLater(child), offset) :: Nil
125
- case LimitAndOffset (IntegerLiteral (limit), IntegerLiteral (offset),
126
- Project (projectList, Sort (order, true , child)))
127
- if limit + offset < conf.topKSortFallbackThreshold =>
128
- TakeOrderedAndProjectExec (limit, order, projectList, planLater(child), offset) :: Nil
129
- case LimitAndOffset (IntegerLiteral (limit), IntegerLiteral (offset), child) =>
130
- GlobalLimitAndOffsetExec (limit, offset, planLater(child)) :: Nil
131
- case _ =>
132
- Nil
131
+ Some (TakeOrderedAndProjectExec (
132
+ limit, order, projectList, planLater(child)))
133
+ case _ => None
133
134
}
134
135
}
135
136
@@ -814,12 +815,19 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
814
815
case logical.LocalRelation (output, data, _) =>
815
816
LocalTableScanExec (output, data) :: Nil
816
817
case CommandResult (output, _, plan, data) => CommandResultExec (output, plan, data) :: Nil
818
+ // We should match the combination of limit and offset first, to get the optimal physical
819
+ // plan, instead of planning limit and offset separately.
820
+ case LimitAndOffset (limit, offset, child) =>
821
+ GlobalLimitExec (limit, planLater(child), offset) :: Nil
822
+ case OffsetAndLimit (offset, limit, child) =>
823
+ // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
824
+ GlobalLimitExec (limit = offset + limit, child = planLater(child), offset = offset) :: Nil
817
825
case logical.LocalLimit (IntegerLiteral (limit), child) =>
818
826
execution.LocalLimitExec (limit, planLater(child)) :: Nil
819
827
case logical.GlobalLimit (IntegerLiteral (limit), child) =>
820
828
execution.GlobalLimitExec (limit, planLater(child)) :: Nil
821
829
case logical.Offset (IntegerLiteral (offset), child) =>
822
- GlobalLimitAndOffsetExec (offset = offset, child = planLater(child)) :: Nil
830
+ GlobalLimitExec ( child = planLater(child), offset = offset ) :: Nil
823
831
case union : logical.Union =>
824
832
execution.UnionExec (union.children.map(planLater)) :: Nil
825
833
case g @ logical.Generate (generator, _, outer, _, _, child) =>
0 commit comments