Skip to content

Commit

Permalink
[fix](Nereids): MergeSetOperations can merge SetOperation ALL. (apach…
Browse files Browse the repository at this point in the history
  • Loading branch information
jackwener authored Jun 18, 2023
1 parent 5ae1454 commit ac32900
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,21 +218,28 @@ public class Rewriter extends AbstractBatchJobExecutor {
// this rule should invoke after ColumnPruning
custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new),

// we need to execute this rule at the end of rewrite
// to avoid two consecutive same project appear when we do optimization.
topic("Others optimization",
bottomUp(ImmutableList.<RuleFactory>builder().addAll(ImmutableList.of(
new EliminateNotNull(),
new EliminateLimit(),
new EliminateFilter(),
new EliminateAggregate(),
new MergeSetOperations(),
new PushdownLimit(),
new BuildAggForUnion()
// after eliminate filter, the project maybe can push down again,
// so we add push down rules
)).addAll(RuleSet.PUSH_DOWN_FILTERS).build())
bottomUp(ImmutableList.<RuleFactory>builder()
.addAll(ImmutableList.of(
new EliminateNotNull(),
new EliminateLimit(),
new EliminateFilter(),
new EliminateAggregate(),
new PushdownLimit()
))
// after eliminate some plan, we maybe can push down some plan again, so add push down rules
.add(new PushdownLimit())
.addAll(RuleSet.PUSH_DOWN_FILTERS)
.build()
)
),

topic("Intersection optimization",
// Do MergeSetOperation first because we hope to match pattern of Distinct SetOperator.
bottomUp(new MergeSetOperations()),
bottomUp(new BuildAggForUnion())
),

topic("Window optimization",
topDown(
new PushdownLimit(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@
import java.util.Optional;

/**
* For distinct union, add agg node.
* Convert Union into Agg + UnionAll.
* <pre>
* Agg
* Union -> |
* UnionAll
* </pre>
*/
public class BuildAggForUnion extends OneRewriteRuleFactory {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ public class MergeSetOperations implements RewriteRuleFactory {
@Override
public List<Rule> buildRules() {
return ImmutableList.of(
RuleType.MERGE_SET_OPERATION.build(
logicalSetOperation(any(), any()).when(MergeSetOperations::canMerge).then(parentSetOperation -> {
List<Plan> newChildren = parentSetOperation.children()
.stream()
Expand All @@ -61,8 +60,7 @@ public List<Rule> buildRules() {
}).collect(ImmutableList.toImmutableList());

return parentSetOperation.withChildren(newChildren);
})
)
}).toRule(RuleType.MERGE_SET_OPERATION)
);
}

Expand All @@ -80,7 +78,7 @@ public static boolean canMerge(LogicalSetOperation parent, Plan child) {
}

public static boolean isSameQualifierOrChildQualifierIsAll(LogicalSetOperation parentSetOperation,
LogicalSetOperation childSetOperation) {
LogicalSetOperation childSetOperation) {
return parentSetOperation.getQualifier() == childSetOperation.getQualifier()
|| childSetOperation.getQualifier() == Qualifier.ALL;
}
Expand Down
84 changes: 40 additions & 44 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query49.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,64 +7,60 @@ PhysicalTopN
--------PhysicalDistribute
----------hashAgg[LOCAL]
------------PhysicalUnion
--------------hashAgg[GLOBAL]
----------------PhysicalDistribute
------------------hashAgg[LOCAL]
--------------------PhysicalUnion
----------------------PhysicalProject
------------------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
--------------PhysicalProject
----------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
------------------PhysicalWindow
--------------------PhysicalQuickSort
----------------------PhysicalDistribute
------------------------PhysicalQuickSort
--------------------------PhysicalWindow
----------------------------PhysicalQuickSort
------------------------------PhysicalDistribute
--------------------------------PhysicalQuickSort
----------------------------------PhysicalWindow
------------------------------------PhysicalQuickSort
----------------------------------PhysicalProject
------------------------------------hashAgg[GLOBAL]
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalQuickSort
----------------------------------------hashAgg[LOCAL]
------------------------------------------PhysicalProject
--------------------------------------------hashAgg[GLOBAL]
----------------------------------------------PhysicalDistribute
------------------------------------------------hashAgg[LOCAL]
--------------------------------------------hashJoin[INNER_JOIN](ws.ws_order_number = wr.wr_order_number)(item = wr.wr_item_sk)
----------------------------------------------PhysicalProject
------------------------------------------------filter((wr.wr_return_amt > 10000.00))
--------------------------------------------------PhysicalOlapScan[web_returns]
----------------------------------------------hashJoin[INNER_JOIN](ws.ws_sold_date_sk = date_dim.d_date_sk)
------------------------------------------------PhysicalProject
--------------------------------------------------filter((ws.ws_net_paid > 0.00)(ws.ws_quantity > 0)(ws.ws_net_profit > 1.00))
----------------------------------------------------PhysicalOlapScan[web_sales]
------------------------------------------------PhysicalDistribute
--------------------------------------------------PhysicalProject
----------------------------------------------------hashJoin[INNER_JOIN](ws.ws_order_number = wr.wr_order_number)(item = wr.wr_item_sk)
------------------------------------------------------PhysicalProject
--------------------------------------------------------filter((wr.wr_return_amt > 10000.00))
----------------------------------------------------------PhysicalOlapScan[web_returns]
------------------------------------------------------hashJoin[INNER_JOIN](ws.ws_sold_date_sk = date_dim.d_date_sk)
--------------------------------------------------------PhysicalProject
----------------------------------------------------------filter((ws.ws_net_paid > 0.00)(ws.ws_quantity > 0)(ws.ws_net_profit > 1.00))
------------------------------------------------------------PhysicalOlapScan[web_sales]
--------------------------------------------------------PhysicalDistribute
----------------------------------------------------------PhysicalProject
------------------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
--------------------------------------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
----------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
------------------------------------------------------PhysicalOlapScan[date_dim]
--------------PhysicalProject
----------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
------------------PhysicalWindow
--------------------PhysicalQuickSort
----------------------PhysicalDistribute
------------------------PhysicalQuickSort
--------------------------PhysicalWindow
----------------------------PhysicalQuickSort
------------------------------PhysicalDistribute
--------------------------------PhysicalQuickSort
----------------------------------PhysicalWindow
------------------------------------PhysicalQuickSort
----------------------------------PhysicalProject
------------------------------------hashAgg[GLOBAL]
--------------------------------------PhysicalDistribute
----------------------------------------PhysicalQuickSort
----------------------------------------hashAgg[LOCAL]
------------------------------------------PhysicalProject
--------------------------------------------hashAgg[GLOBAL]
----------------------------------------------PhysicalDistribute
------------------------------------------------hashAgg[LOCAL]
--------------------------------------------hashJoin[INNER_JOIN](cs.cs_order_number = cr.cr_order_number)(item = cr.cr_item_sk)
----------------------------------------------PhysicalProject
------------------------------------------------filter((cr.cr_return_amount > 10000.00))
--------------------------------------------------PhysicalOlapScan[catalog_returns]
----------------------------------------------hashJoin[INNER_JOIN](cs.cs_sold_date_sk = date_dim.d_date_sk)
------------------------------------------------PhysicalProject
--------------------------------------------------filter((cs.cs_net_paid > 0.00)(cs.cs_quantity > 0)(cs.cs_net_profit > 1.00))
----------------------------------------------------PhysicalOlapScan[catalog_sales]
------------------------------------------------PhysicalDistribute
--------------------------------------------------PhysicalProject
----------------------------------------------------hashJoin[INNER_JOIN](cs.cs_order_number = cr.cr_order_number)(item = cr.cr_item_sk)
------------------------------------------------------PhysicalProject
--------------------------------------------------------filter((cr.cr_return_amount > 10000.00))
----------------------------------------------------------PhysicalOlapScan[catalog_returns]
------------------------------------------------------hashJoin[INNER_JOIN](cs.cs_sold_date_sk = date_dim.d_date_sk)
--------------------------------------------------------PhysicalProject
----------------------------------------------------------filter((cs.cs_net_paid > 0.00)(cs.cs_quantity > 0)(cs.cs_net_profit > 1.00))
------------------------------------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------------------------------PhysicalDistribute
----------------------------------------------------------PhysicalProject
------------------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
--------------------------------------------------------------PhysicalOlapScan[date_dim]
----------------------------------------------------filter((date_dim.d_moy = 12)(date_dim.d_year = 1999))
------------------------------------------------------PhysicalOlapScan[date_dim]
--------------PhysicalProject
----------------filter(((return_rank <= 10) OR (currency_rank <= 10)))
------------------PhysicalWindow
Expand Down
68 changes: 32 additions & 36 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query75.out
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,40 @@ CteAnchor[cteId= ( CTEId#3=] )
--------PhysicalDistribute
----------hashAgg[LOCAL]
------------PhysicalUnion
--------------hashAgg[GLOBAL]
----------------PhysicalDistribute
------------------hashAgg[LOCAL]
--------------------PhysicalUnion
----------------------PhysicalProject
------------------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number)
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[catalog_returns]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)
------------------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk)
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[catalog_sales]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
--------------------------------------PhysicalOlapScan[item]
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
------------------------------------PhysicalOlapScan[date_dim]
----------------------PhysicalProject
------------------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
--------------PhysicalProject
----------------hashJoin[RIGHT_OUTER_JOIN](catalog_sales.cs_item_sk = catalog_returns.cr_item_sk)(catalog_sales.cs_order_number = catalog_returns.cr_order_number)
------------------PhysicalProject
--------------------PhysicalOlapScan[catalog_returns]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = catalog_sales.cs_sold_date_sk)
----------------------hashJoin[INNER_JOIN](item.i_item_sk = catalog_sales.cs_item_sk)
------------------------PhysicalProject
--------------------------PhysicalOlapScan[catalog_sales]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[store_returns]
----------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
------------------------------PhysicalOlapScan[item]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
----------------------------PhysicalOlapScan[date_dim]
--------------PhysicalProject
----------------hashJoin[RIGHT_OUTER_JOIN](store_sales.ss_item_sk = store_returns.sr_item_sk)(store_sales.ss_ticket_number = store_returns.sr_ticket_number)
------------------PhysicalProject
--------------------PhysicalOlapScan[store_returns]
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk)
----------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
------------------------PhysicalProject
--------------------------PhysicalOlapScan[store_sales]
------------------------PhysicalDistribute
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN](date_dim.d_date_sk = store_sales.ss_sold_date_sk)
------------------------------hashJoin[INNER_JOIN](item.i_item_sk = store_sales.ss_item_sk)
--------------------------------PhysicalProject
----------------------------------PhysicalOlapScan[store_sales]
--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
------------------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
--------------------------------------PhysicalOlapScan[item]
------------------------------PhysicalDistribute
--------------------------------PhysicalProject
----------------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
------------------------------------PhysicalOlapScan[date_dim]
----------------------------filter((cast(i_category as VARCHAR(*)) = 'Home'))
------------------------------PhysicalOlapScan[item]
----------------------PhysicalDistribute
------------------------PhysicalProject
--------------------------filter(((date_dim.d_year = 1998) OR (date_dim.d_year = 1999)))
----------------------------PhysicalOlapScan[date_dim]
--------------PhysicalProject
----------------hashJoin[RIGHT_OUTER_JOIN](web_sales.ws_item_sk = web_returns.wr_item_sk)(web_sales.ws_order_number = web_returns.wr_order_number)
------------------PhysicalProject
Expand Down

0 comments on commit ac32900

Please sign in to comment.