diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java index 4c6a8fdb8ade88..2852db11089a25 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/SubqueryToApply.java @@ -23,7 +23,6 @@ import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.BinaryOperator; -import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; import org.apache.doris.nereids.trees.expressions.Exists; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.InSubquery; @@ -45,6 +44,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -180,7 +180,13 @@ public List buildRules() { Collectors.toList())); List subqueryConjuncts = joinConjuncts.get(true); if (subqueryConjuncts == null || subqueryConjuncts.stream() - .anyMatch(expr -> !isValidSubqueryConjunct(expr, join.left()))) { + .anyMatch(expr -> !isValidSubqueryConjunct(expr))) { + return join; + } + + List relatedInfoList = collectRelatedInfo( + subqueryConjuncts, join.left(), join.right()); + if (relatedInfoList.stream().anyMatch(info -> info == RelatedInfo.UnSupported)) { return join; } @@ -188,8 +194,9 @@ public List buildRules() { .>map(e -> e.collect(SubqueryExpr.class::isInstance)) .collect(ImmutableList.toImmutableList()); ImmutableList.Builder newConjuncts = new ImmutableList.Builder<>(); - LogicalPlan applyPlan = null; + LogicalPlan applyPlan; LogicalPlan leftChildPlan = (LogicalPlan) join.left(); + LogicalPlan rightChildPlan = (LogicalPlan) join.right(); // Subquery traversal with the conjunct of and as the granularity. for (int i = 0; i < subqueryExprsList.size(); ++i) { @@ -207,9 +214,14 @@ public List buildRules() { applyPlan = subqueryToApply( subqueryExprs.stream().collect(ImmutableList.toImmutableList()), - leftChildPlan, context.getSubqueryToMarkJoinSlot(), + relatedInfoList.get(i) == RelatedInfo.RelatedToLeft ? leftChildPlan : rightChildPlan, + context.getSubqueryToMarkJoinSlot(), ctx.cascadesContext, Optional.of(conjunct), false); - leftChildPlan = applyPlan; + if (relatedInfoList.get(i) == RelatedInfo.RelatedToLeft) { + leftChildPlan = applyPlan; + } else { + rightChildPlan = applyPlan; + } newConjuncts.add(conjunct); } List simpleConjuncts = joinConjuncts.get(false); @@ -217,34 +229,82 @@ public List buildRules() { newConjuncts.addAll(simpleConjuncts); } Plan newJoin = join.withConjunctsChildren(join.getHashJoinConjuncts(), - newConjuncts.build(), applyPlan, join.right()); + newConjuncts.build(), leftChildPlan, rightChildPlan); return newJoin; })) ); } - private static boolean isValidSubqueryConjunct(Expression expression, Plan leftChild) { - // the subquery must be uncorrelated subquery or only correlated to the left child - // currently only support the following 4 simple scenarios - // 1. col ComparisonPredicate subquery - // 2. col in (subquery) - // 3. exists (subquery) - // 4. col1 ComparisonPredicate subquery or xxx (no more subquery) - List slots = leftChild.getOutput(); - if (expression instanceof ComparisonPredicate && expression.child(1) instanceof ScalarSubquery) { - ScalarSubquery subquery = (ScalarSubquery) expression.child(1); - return slots.containsAll(subquery.getCorrelateSlots()); - } else if (expression instanceof InSubquery) { - return slots.containsAll(((InSubquery) expression).getCorrelateSlots()); - } else if (expression instanceof Exists) { - return slots.containsAll(((Exists) expression).getCorrelateSlots()); - } else { + private static boolean isValidSubqueryConjunct(Expression expression) { + // only support 1 subquery expr in the expression + // don't support expression like subquery1 or subquery2 + return expression.collectToList(SubqueryExpr.class::isInstance).size() == 1; + } + + private enum RelatedInfo { + // both subquery and its output don't related to any child. like (select sum(t.a) from t) > 1 + Unrelated, + // either subquery or its output only related to left child. like bellow: + // tableLeft.a in (select t.a from t) + // 3 in (select t.b from t where t.a = tableLeft.a) + // tableLeft.a > (select sum(t.a) from t where tableLeft.b = t.b) + RelatedToLeft, + // like above, but related to right child + RelatedToRight, + // subquery related to both left and child is not supported: + // tableLeft.a > (select sum(t.a) from t where t.b = tableRight.b) + UnSupported + } + + private ImmutableList collectRelatedInfo(List subqueryConjuncts, + Plan leftChild, Plan rightChild) { + int size = subqueryConjuncts.size(); + ImmutableList.Builder correlatedInfoList = new ImmutableList.Builder<>(); + Set leftOutputSlots = leftChild.getOutputSet(); + Set rightOutputSlots = rightChild.getOutputSet(); + for (int i = 0; i < size; ++i) { + Expression expression = subqueryConjuncts.get(i); List subqueryExprs = expression.collectToList(SubqueryExpr.class::isInstance); + RelatedInfo relatedInfo = RelatedInfo.UnSupported; if (subqueryExprs.size() == 1) { - return slots.containsAll(subqueryExprs.get(0).getCorrelateSlots()); + SubqueryExpr subqueryExpr = subqueryExprs.get(0); + List correlatedSlots = subqueryExpr.getCorrelateSlots(); + if (subqueryExpr instanceof ScalarSubquery) { + Set inputSlots = expression.getInputSlots(); + if (correlatedSlots.isEmpty() && inputSlots.isEmpty()) { + relatedInfo = RelatedInfo.Unrelated; + } else if (leftOutputSlots.containsAll(inputSlots) + && leftOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToLeft; + } else if (rightOutputSlots.containsAll(inputSlots) + && rightOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToRight; + } + } else if (subqueryExpr instanceof InSubquery) { + InSubquery inSubquery = (InSubquery) subqueryExpr; + Set compareSlots = inSubquery.getCompareExpr().getInputSlots(); + if (compareSlots.isEmpty()) { + relatedInfo = RelatedInfo.UnSupported; + } else if (leftOutputSlots.containsAll(compareSlots) + && leftOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToLeft; + } else if (rightOutputSlots.containsAll(compareSlots) + && rightOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToRight; + } + } else if (subqueryExpr instanceof Exists) { + if (correlatedSlots.isEmpty()) { + relatedInfo = RelatedInfo.Unrelated; + } else if (leftOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToLeft; + } else if (rightOutputSlots.containsAll(correlatedSlots)) { + relatedInfo = RelatedInfo.RelatedToRight; + } + } } + correlatedInfoList.add(relatedInfo); } - return false; + return correlatedInfoList.build(); } private LogicalPlan subqueryToApply(List subqueryExprs, LogicalPlan childPlan, @@ -270,10 +330,17 @@ private LogicalPlan subqueryToApply(List subqueryExprs, LogicalPla private boolean nonMarkJoinExistsWithAgg(SubqueryExpr exists, Map> subqueryToMarkJoinSlot) { return exists instanceof Exists - && exists.getQueryPlan() - .anyMatch(planTreeNode -> planTreeNode instanceof LogicalAggregate - && ((LogicalAggregate) planTreeNode).getGroupByExpressions().isEmpty()) - && !subqueryToMarkJoinSlot.get(exists).isPresent(); + && !subqueryToMarkJoinSlot.get(exists).isPresent() + && hasTopLevelAggWithoutGroupBy(exists.getQueryPlan()); + } + + private boolean hasTopLevelAggWithoutGroupBy(Plan plan) { + if (plan instanceof LogicalAggregate) { + return ((LogicalAggregate) plan).getGroupByExpressions().isEmpty(); + } else if (plan instanceof LogicalProject || plan instanceof LogicalSort) { + return hasTopLevelAggWithoutGroupBy(plan.child(0)); + } + return false; } private LogicalPlan addApply(SubqueryExpr subquery, LogicalPlan childPlan, diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out index c80519466733c1..0564d1884d9c05 100644 --- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out +++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out @@ -486,3 +486,6 @@ true -- !cir_5218_exists_ok_6 -- 0 +-- !doris_7643 -- +3 3 + diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy index 1c2dd55676d42c..01e347031f82ef 100644 --- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy +++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy @@ -65,6 +65,62 @@ suite("test_subquery") { select * from nereids_test_query_db.baseall where k1 = (select k1 from nereids_test_query_db.baseall order by k1 desc limit 1) """ + sql """DROP TABLE IF EXISTS table_1000_undef_undef""" + sql """DROP TABLE IF EXISTS table_1000_undef_undef2""" + sql """CREATE TABLE `table_1000_undef_undef` ( + `pk` int(11) NULL, + `col_bigint_undef_signed` bigint(20) NULL, + `col_bigint_undef_signed2` bigint(20) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`pk`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); """ + + sql """ CREATE TABLE `table_1000_undef_undef2` ( + `pk` int(11) NULL, + `col_bigint_undef_signed` bigint(20) NULL, + `col_bigint_undef_signed2` bigint(20) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`pk`, `col_bigint_undef_signed`, `col_bigint_undef_signed2`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`pk`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + );""" + explain { + sql """ + SELECT `col_bigint_undef_signed` '00:39:36' , `col_bigint_undef_signed` '11:19:45', `col_bigint_undef_signed` '11:55:37', `col_bigint_undef_signed2` '19:01:23' + FROM table_1000_undef_undef2 + WHERE EXISTS + (SELECT `col_bigint_undef_signed` '17:38:13' , `col_bigint_undef_signed2` '17:36:21' + FROM table_1000_undef_undef2 + WHERE `col_bigint_undef_signed2` NOT IN + (SELECT `col_bigint_undef_signed` + FROM table_1000_undef_undef2 + WHERE `col_bigint_undef_signed2` < + (SELECT AVG(`col_bigint_undef_signed`) + FROM table_1000_undef_undef2 + WHERE `col_bigint_undef_signed2` < 2)) ) ; + """ + contains("VAGGREGATE") + } + + sql """DROP TABLE IF EXISTS table_1000_undef_undef""" + sql """DROP TABLE IF EXISTS table_1000_undef_undef2""" + sql """drop table if exists test_one_row_relation;""" sql """ CREATE TABLE `test_one_row_relation` ( diff --git a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy index f64cba1109764c..0e98510e96fedb 100644 --- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy +++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy @@ -648,6 +648,34 @@ suite ("sub_query_correlated") { exception "Unsupported correlated subquery with grouping and/or aggregation"; } + qt_doris_7643 """ + SELECT sub_query_correlated_subquery6.* + FROM sub_query_correlated_subquery6 + JOIN sub_query_correlated_subquery7 + ON sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery7.k3 + AND EXISTS + (SELECT sub_query_correlated_subquery8.k1 + FROM sub_query_correlated_subquery8 ) + AND sub_query_correlated_subquery6.k2 IN + (SELECT sub_query_correlated_subquery8.k2 + FROM sub_query_correlated_subquery8 ) + AND sub_query_correlated_subquery6.k1 IN + (SELECT sub_query_correlated_subquery8.k2 + FROM sub_query_correlated_subquery8 + WHERE sub_query_correlated_subquery6.k2 = sub_query_correlated_subquery8.k2 ) + AND sub_query_correlated_subquery7.k3 IN + (SELECT sub_query_correlated_subquery8.k1 + FROM sub_query_correlated_subquery8 ) + AND 10 > + (SELECT min(sub_query_correlated_subquery8.k2) + FROM sub_query_correlated_subquery8 ) + AND sub_query_correlated_subquery7.k3 IN + (SELECT sub_query_correlated_subquery8.k2 + FROM sub_query_correlated_subquery8 + WHERE sub_query_correlated_subquery7.v1 = sub_query_correlated_subquery8.k2 ) + ORDER BY sub_query_correlated_subquery6.k1, sub_query_correlated_subquery6.k2; + """ + // order_qt_doris_6937_2 """ // select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 > sub_query_correlated_subquery1.k2) or k1 < 10 order by k1, k2; // """ diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy index e6d2906c0d3b53..15b010fdfae088 100644 --- a/regression-test/suites/query_p0/join/test_join.groovy +++ b/regression-test/suites/query_p0/join/test_join.groovy @@ -1107,7 +1107,7 @@ suite("test_join", "query,p0") { qt_join_on_predicate7"""SELECT t2.k1,t2.k2,t3.k1,t3.k2 FROM baseall t2 LEFT JOIN test t3 ON t2.k2=t3.k2 WHERE t2.k1 = 4 OR (t2.k1 > 4 AND t3.k1 IS NULL) order by 1, 2, 3, 4""" test { - sql "select a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;" + sql "select /*+ SET_VAR(enable_nereids_planner=false) */ a.k1 from baseall a join test b on b.k2 in (select 49) and a.k1 = b.k1 order by k1;" exception "Not support OnClause contain Subquery" }