From c42b9f26a996dc378aefc584c28eee2fc5c1d49e Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:31:42 +0800 Subject: [PATCH] [fix](nereids) count in correlated subquery shoud not output null value (#27064) consider sql: SELECT * FROM t1 WHERE t1.a <= (SELECT COUNT(t2.a) FROM t2 WHERE (t1.b = t2.b)); when unnest correlated subquery, we create a left join node. Assume outer query is left table and subquery is right one. If there is no match, the row from right table is filled with nulls. But COUNT function is always not nullable. So wrap COUNT with Nvl to ensure it's result is 0 instead of null to get the correct result --- .../expression/rules/FunctionBinder.java | 19 ++++++++++++++++++- .../nereids_p0/subquery/test_subquery.groovy | 5 +++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java index ac6a83f9cf46806..30e36c273fe4451 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FunctionBinder.java @@ -49,8 +49,11 @@ import org.apache.doris.nereids.trees.expressions.WhenClause; import org.apache.doris.nereids.trees.expressions.functions.BoundFunction; import org.apache.doris.nereids.trees.expressions.functions.FunctionBuilder; +import org.apache.doris.nereids.trees.expressions.functions.agg.Count; import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda; +import org.apache.doris.nereids.trees.expressions.functions.scalar.Nvl; import org.apache.doris.nereids.trees.expressions.functions.udf.AliasUdfBuilder; +import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; import org.apache.doris.nereids.trees.expressions.typecoercion.ImplicitCastInputTypes; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BigIntType; @@ -167,7 +170,21 @@ public Expression visitUnboundFunction(UnboundFunction unboundFunction, Expressi // we do type coercion in build function in alias function, so it's ok to return directly. return builder.build(functionName, arguments); } else { - return TypeCoercionUtils.processBoundFunction((BoundFunction) builder.build(functionName, arguments)); + Expression boundFunction = TypeCoercionUtils + .processBoundFunction((BoundFunction) builder.build(functionName, arguments)); + if (boundFunction instanceof Count + && context.cascadesContext.getOuterScope().isPresent() + && !context.cascadesContext.getOuterScope().get().getCorrelatedSlots() + .isEmpty()) { + // consider sql: SELECT * FROM t1 WHERE t1.a <= (SELECT COUNT(t2.a) FROM t2 WHERE (t1.b = t2.b)); + // when unnest correlated subquery, we create a left join node. + // outer query is left table and subquery is right one + // if there is no match, the row from right table is filled with nulls + // but COUNT function is always not nullable. + // so wrap COUNT with Nvl to ensure it's result is 0 instead of null to get the correct result + boundFunction = new Nvl(boundFunction, new BigIntLiteral(0)); + } + return boundFunction; } } diff --git a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy index 01e347031f82ef6..7170fd20d57019f 100644 --- a/regression-test/suites/nereids_p0/subquery/test_subquery.groovy +++ b/regression-test/suites/nereids_p0/subquery/test_subquery.groovy @@ -118,6 +118,11 @@ suite("test_subquery") { contains("VAGGREGATE") } + explain { + sql """SELECT * FROM table_1000_undef_undef t1 WHERE t1.pk <= (SELECT COUNT(t2.pk) FROM table_1000_undef_undef2 t2 WHERE (t1.col_bigint_undef_signed = t2.col_bigint_undef_signed)); """ + contains("ifnull") + } + sql """DROP TABLE IF EXISTS table_1000_undef_undef""" sql """DROP TABLE IF EXISTS table_1000_undef_undef2"""