Skip to content

Commit

Permalink
Optimizer: Fix null filtering logic for IN list (#53370)
Browse files Browse the repository at this point in the history
close #49476
  • Loading branch information
ghazalfamilyusa authored May 21, 2024
1 parent def7c23 commit 61a2981
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 107 deletions.
2 changes: 2 additions & 0 deletions pkg/planner/core/casetest/rule/rule_outer2inner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ func TestOuter2Inner(t *testing.T) {
tk.MustExec("CREATE TABLE part(P_PARTKEY INTEGER,P_BRAND CHAR(10),P_CONTAINER CHAR(10))")
tk.MustExec("CREATE TABLE d (pk int, col_blob blob, col_blob_key blob, col_varchar_key varchar(1) , col_date date, col_int_key int)")
tk.MustExec("CREATE TABLE dd (pk int, col_blob blob, col_blob_key blob, col_date date, col_int_key int)")
tk.MustExec("create table t0 (a0 int, b0 char, c0 char(2))")
tk.MustExec("create table t11 (a1 int, b1 char, c1 char)")

var input Input
var output []struct {
Expand Down
12 changes: 11 additions & 1 deletion pkg/planner/core/casetest/rule/testdata/outer2inner_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
"select * from t1 left outer join t2 on a1=a2 where not(b2 is null) -- another form of basic case of not null",
"select * from t1 left outer join t2 on a1=a2 where c2 = 5 OR b2 < 55 -- case with A OR B (Both A and B are null filtering)",
"select * from t1 left outer join t2 on a1=a2 where c2 = 5 AND b2 is null -- case with A AND B (A is null filtering and B is not)",
"select * from t1 left outer join t2 on a1=a2 where b2 is NULL AND c2 = 5 -- case with A AND B (A is null filtering and B is not)",
"select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL OR c2 = 5) -- NOT case ",
"select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL AND c2 = 5) -- NOT case ",
"select * from t2 left outer join t1 on a1=a2 where b1+b1 > 2; -- expression evaluates to UNKNOWN/FALSE",
"select * from t2 left outer join t1 on a1=a2 where coalesce(b1,2) > 2; -- false condition for b1=NULL",
"select * from t2 left outer join t1 on a1=a2 where true and b1 = 5; -- AND with one branch is null filtering",
Expand All @@ -21,6 +24,7 @@
"WITH cte AS ( SELECT alias1.col_date AS field1 FROM d AS alias1 LEFT JOIN dd AS alias2 ON alias1.col_blob_key=alias2.col_blob_key WHERE alias1.col_varchar_key IS NULL OR alias1.col_blob_key >= 'a') DELETE FROM outr1.*, outr2.* USING d AS outr1 LEFT OUTER JOIN dd AS outr2 ON (outr1.col_date=outr2.col_date) JOIN cte AS outrcte ON outr2.col_blob_key=outrcte.field1 -- nested complex case",
"with cte as (select count(a2) as cnt,b2-5 as b3 from t1 left outer join t2 on a1=a2 group by b3) select * from cte where b3 > 1 -- aggregate case.",
"select * from dd as outr1 WHERE outr1.col_blob IN (SELECT DISTINCT innr1.col_blob_key AS y FROM d AS innrcte left outer join dd AS innr1 ON innr1.pk = innrcte.col_date WHERE outr1.col_int_key > 6)",
"select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t11.b1, t11.c1) -- each = in the in list is null filtering",
"select * from t1 left outer join t2 on a1=a2 where b2 is null -- negative case with single predicate which is not null filtering",
"select * from t1 left outer join t2 on a1=a2 where c2 = 5 OR b2 is null -- negative case with A OR B (A is null filtering and B is not)",
"select * from t1 left outer join t2 on a1=a2 where not(b2 is not null) -- nested 'not' negative case",
Expand All @@ -37,7 +41,13 @@
"SELECT * FROM ti LEFT JOIN (SELECT i FROM ti WHERE FALSE) AS d1 ON ti.i = d1.i WHERE NOT EXISTS (SELECT 1 FROM ti AS inner_t1 WHERE i = d1.i) -- anti semi join",
"select count(*) from t1 where t1.a1+100 > ( select count(*) from t2 where t1.a1=t2.a2 and t1.b1=t2.b2) group by t1.b1 -- filter not filtering over derived outer join",
"with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where b2 > 1 -- non null filter on group by",
"with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where cnt > 1 -- filter on aggregates not applicable"
"with cte as (select count(a2) as cnt,ifnull(b2,5) as b2 from t1 left outer join t2 on a1=a2 group by b2) select * from cte where cnt > 1 -- filter on aggregates not applicable",
"select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t0.b0, t11.b1)",
"select * from t0 left outer join t11 on a0=a1 where '5' not in (t0.b0, t11.b1)",
"select * from t0 left outer join t11 on a0=a1 where '1' in (t0.b0, t11.b1)",
"select * from t0 left outer join t11 on a0=a1 where t0.b0 in ('5', t11.b1) -- some = in the in list is not null filtering",
"select * from t0 left outer join t11 on a0=a1 where '5' in (t0.b0, t11.b1) -- some = in the in list is not null filtering",
"select * from t1 left outer join t2 on a1=a2 where not (b2 is NOT NULL AND c2 = 5) -- NOT case "
]
}
]
123 changes: 123 additions & 0 deletions pkg/planner/core/casetest/rule/testdata/outer2inner_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,45 @@
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t1 left outer join t2 on a1=a2 where b2 is NULL AND c2 = 5 -- case with A AND B (A is null filtering and B is not)",
"Plan": [
"Projection 0.01 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2",
"└─HashJoin 0.01 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]",
" ├─TableReader(Build) 0.01 root data:Selection",
" │ └─Selection 0.01 cop[tikv] eq(test.t2.c2, 5), isnull(test.t2.b2), not(isnull(test.t2.a2))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL OR c2 = 5) -- NOT case ",
"Plan": [
"Projection 9990.00 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2",
"└─HashJoin 9990.00 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]",
" ├─TableReader(Build) 7992.00 root data:Selection",
" │ └─Selection 7992.00 cop[tikv] and(not(isnull(test.t2.b2)), ne(test.t2.c2, 5)), not(isnull(test.t2.a2))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t1 left outer join t2 on a1=a2 where not (b2 is NULL AND c2 = 5) -- NOT case ",
"Plan": [
"Projection 12483.33 root test.t1.a1, test.t1.b1, test.t1.c1, test.t2.a2, test.t2.b2, test.t2.c2",
"└─HashJoin 12483.33 root inner join, equal:[eq(test.t2.a2, test.t1.a1)]",
" ├─TableReader(Build) 9986.66 root data:Selection",
" │ └─Selection 9986.66 cop[tikv] not(isnull(test.t2.a2)), or(not(isnull(test.t2.b2)), ne(test.t2.c2, 5))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.a1))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t2 left outer join t1 on a1=a2 where b1+b1 > 2; -- expression evaluates to UNKNOWN/FALSE",
"Plan": [
Expand Down Expand Up @@ -297,6 +336,18 @@
" └─TableFullScan 99900000.00 cop[tikv] table:innr1 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t11.b1, t11.c1) -- each = in the in list is null filtering",
"Plan": [
"HashJoin 12487.50 root inner join, equal:[eq(test.t0.a0, test.t11.a1)], other cond:in(test.t0.b0, test.t11.b1, test.t11.c1)",
"├─TableReader(Build) 9990.00 root data:Selection",
"│ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))",
"│ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo",
"└─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t0.a0))",
" └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t1 left outer join t2 on a1=a2 where b2 is null -- negative case with single predicate which is not null filtering",
"Plan": [
Expand Down Expand Up @@ -526,6 +577,78 @@
" └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t0 left outer join t11 on a0=a1 where t0.b0 in (t0.b0, t11.b1)",
"Plan": [
"Selection 9990.00 root in(test.t0.b0, test.t0.b0, test.t11.b1)",
"└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo",
" └─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t0 left outer join t11 on a0=a1 where '5' not in (t0.b0, t11.b1)",
"Plan": [
"Selection 9990.00 root not(in(\"5\", test.t0.b0, test.t11.b1))",
"└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo",
" └─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t0 left outer join t11 on a0=a1 where '1' in (t0.b0, t11.b1)",
"Plan": [
"Selection 9990.00 root in(\"1\", test.t0.b0, test.t11.b1)",
"└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo",
" └─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t0 left outer join t11 on a0=a1 where t0.b0 in ('5', t11.b1) -- some = in the in list is not null filtering",
"Plan": [
"Selection 9990.00 root in(test.t0.b0, \"5\", test.t11.b1)",
"└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo",
" └─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t0 left outer join t11 on a0=a1 where '5' in (t0.b0, t11.b1) -- some = in the in list is not null filtering",
"Plan": [
"Selection 9990.00 root in(\"5\", test.t0.b0, test.t11.b1)",
"└─HashJoin 12487.50 root left outer join, equal:[eq(test.t0.a0, test.t11.a1)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t11.a1))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t11 keep order:false, stats:pseudo",
" └─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t0 keep order:false, stats:pseudo"
]
},
{
"SQL": "select * from t1 left outer join t2 on a1=a2 where not (b2 is NOT NULL AND c2 = 5) -- NOT case ",
"Plan": [
"Selection 9990.00 root not(and(not(isnull(test.t2.b2)), eq(test.t2.c2, 5)))",
"└─HashJoin 12487.50 root left outer join, equal:[eq(test.t1.a1, test.t2.a2)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.a2))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─TableReader(Probe) 10000.00 root data:TableFullScan",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"
]
}
]
}
Expand Down
51 changes: 1 addition & 50 deletions pkg/planner/core/rule_outer_to_inner_join.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"context"

"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/util"
"github.com/pingcap/tidb/pkg/planner/util/optimizetrace"
Expand Down Expand Up @@ -87,7 +86,7 @@ func (p *LogicalJoin) ConvertOuterToInnerJoin(predicates []expression.Expression
if p.JoinType == LeftOuterJoin || p.JoinType == RightOuterJoin {
canBeSimplified := false
for _, expr := range predicates {
isOk := isNullFiltered(p.SCtx(), innerTable.Schema(), expr)
isOk := util.IsNullRejected(p.SCtx(), innerTable.Schema(), expr)
if isOk {
canBeSimplified = true
break
Expand Down Expand Up @@ -149,51 +148,3 @@ func (s *LogicalProjection) ConvertOuterToInnerJoin(predicates []expression.Expr
p.SetChildren(child)
return p
}

// allConstants checks if only the expression has only constants.
func allConstants(ctx expression.BuildContext, expr expression.Expression) bool {
if expression.MaybeOverOptimized4PlanCache(ctx, []expression.Expression{expr}) {
return false // expression contains non-deterministic parameter
}
switch v := expr.(type) {
case *expression.ScalarFunction:
for _, arg := range v.GetArgs() {
if !allConstants(ctx, arg) {
return false
}
}
return true
case *expression.Constant:
return true
}
return false
}

// isNullFiltered takes care of complex predicates like this:
// isNullFiltered(A OR B) = isNullFiltered(A) AND isNullFiltered(B)
// isNullFiltered(A AND B) = isNullFiltered(A) OR isNullFiltered(B)
func isNullFiltered(ctx base.PlanContext, innerSchema *expression.Schema, predicate expression.Expression) bool {
// The expression should reference at least one field in innerSchema or all constants.
if !expression.ExprReferenceSchema(predicate, innerSchema) && !allConstants(ctx.GetExprCtx(), predicate) {
return false
}

switch expr := predicate.(type) {
case *expression.ScalarFunction:
if expr.FuncName.L == ast.LogicAnd {
if isNullFiltered(ctx, innerSchema, expr.GetArgs()[0]) {
return true
}
return isNullFiltered(ctx, innerSchema, expr.GetArgs()[0])
} else if expr.FuncName.L == ast.LogicOr {
if !(isNullFiltered(ctx, innerSchema, expr.GetArgs()[0])) {
return false
}
return isNullFiltered(ctx, innerSchema, expr.GetArgs()[1])
} else {
return util.IsNullRejected(ctx, innerSchema, expr)
}
default:
return util.IsNullRejected(ctx, innerSchema, predicate)
}
}
1 change: 1 addition & 0 deletions pkg/planner/util/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ go_library(
"//pkg/parser/model",
"//pkg/parser/mysql",
"//pkg/planner/context",
"//pkg/planner/core/base",
"//pkg/sessionctx/stmtctx",
"//pkg/tablecodec",
"//pkg/types",
Expand Down
Loading

0 comments on commit 61a2981

Please sign in to comment.