Skip to content

Commit

Permalink
Optimize code
Browse files Browse the repository at this point in the history
  • Loading branch information
beliefer committed Nov 12, 2020
1 parent fd7e02e commit 72ceacc
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 127 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -808,8 +808,7 @@ object CollapseRepartition extends Rule[LogicalPlan] {
}

/**
* Substitute the aggregate expression which uses [[First]] as the aggregate function
* in the window with the window function [[NthValue]].
* Replaces first(col) to nth_value(col, 1) for better performance.
*/
object OptimizeWindowFunctions extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,35 @@ class OptimizeWindowFunctionsSuite extends PlanTest {
OptimizeWindowFunctions) :: Nil
}

test("check OptimizeWindowFunctions") {
val testRelation = LocalRelation('a.double, 'b.double, 'c.string)
val a = testRelation.output.head
val testRelation = LocalRelation('a.double, 'b.double, 'c.string)
val a = testRelation.output(0)
val b = testRelation.output(1)
val c = testRelation.output(2)

test("replace first(col) by nth_value(col, 1) if the window frame is ordered") {
val inputPlan = testRelation.select(
WindowExpression(
First(a, false).toAggregateExpression(),
WindowSpecDefinition(Nil, a.asc :: Nil,
WindowSpecDefinition(b :: Nil, c.asc :: Nil,
SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))))
val correctAnswer = testRelation.select(
WindowExpression(
NthValue(a, Literal(1), false),
WindowSpecDefinition(Nil, a.asc :: Nil,
WindowSpecDefinition(b :: Nil, c.asc :: Nil,
SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))))

val optimized = Optimize.execute(inputPlan)
assert(optimized == correctAnswer)
}

test("can't replace first(col) by nth_value(col, 1) if the window frame isn't ordered") {
val inputPlan = testRelation.select(
WindowExpression(
First(a, false).toAggregateExpression(),
WindowSpecDefinition(b :: Nil, Nil,
SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))))

val optimized = Optimize.execute(inputPlan)
assert(optimized == inputPlan)
}
}
94 changes: 34 additions & 60 deletions sql/core/src/test/resources/sql-tests/inputs/window.sql
Original file line number Diff line number Diff line change
Expand Up @@ -150,130 +150,104 @@ FROM testData ORDER BY cate, val;
SELECT
employee_name,
salary,
first_value(employee_name) OVER (ORDER BY salary DESC) highest_salary,
nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary
RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary
RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING)
ORDER BY salary;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
ORDER BY salary DESC;

SELECT
employee_name,
salary,
first_value(employee_name) OVER (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) highest_salary,
nth_value(employee_name, 2) OVER (
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary
first_value(employee_name) OVER w highest_salary,
nth_value(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
ORDER BY salary DESC;

SELECT
employee_name,
department,
salary,
FIRST_VALUE(employee_name) OVER (
PARTITION BY department
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) highest_salary,
NTH_VALUE(employee_name, 2) OVER (
PARTITION BY department
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) second_highest_salary
FIRST_VALUE(employee_name) OVER w highest_salary,
NTH_VALUE(employee_name, 2) OVER w second_highest_salary
FROM
basic_pays
WINDOW w AS (
PARTITION BY department
ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ORDER BY department;
Loading

0 comments on commit 72ceacc

Please sign in to comment.