Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support converting json_overlaps/contains to IndexMerge to access MVIndex #40195

Merged
merged 29 commits into from
Jan 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 75 additions & 3 deletions planner/core/indexmerge_path.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ import (
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/sessionctx"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/chunk"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/zap"
Expand Down Expand Up @@ -552,6 +554,7 @@ func (ds *DataSource) generateIndexMergeJSONMVIndexPath(normalPathCnt int, filte

var jsonPath expression.Expression
var vals []expression.Expression
var indexMergeIsIntersection bool
switch sf.FuncName.L {
case ast.JSONMemberOf: // (1 member of a->'$.zip')
jsonPath = sf.GetArgs()[1]
Expand All @@ -560,10 +563,29 @@ func (ds *DataSource) generateIndexMergeJSONMVIndexPath(normalPathCnt int, filte
continue
}
vals = append(vals, v)
case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]')
continue // TODO: support json_overlaps
case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]')
continue // TODO: support json_contains
indexMergeIsIntersection = true
jsonPath = sf.GetArgs()[0]
var ok bool
vals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1])
if !ok {
continue
}
case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]')
var jsonPathIdx int
if sf.GetArgs()[0].Equal(ds.ctx, targetJSONPath) {
jsonPathIdx = 0 // (json_overlaps(a->'$.zip', '[1, 2, 3]')
} else if sf.GetArgs()[1].Equal(ds.ctx, targetJSONPath) {
jsonPathIdx = 1 // (json_overlaps('[1, 2, 3]', a->'$.zip')
} else {
continue
}
jsonPath = sf.GetArgs()[jsonPathIdx]
var ok bool
vals, ok = jsonArrayExpr2Exprs(ds.ctx, sf.GetArgs()[1-jsonPathIdx])
if !ok {
continue
}
default:
continue
}
Expand Down Expand Up @@ -612,12 +634,62 @@ func (ds *DataSource) generateIndexMergeJSONMVIndexPath(normalPathCnt int, filte
partialPaths = append(partialPaths, partialPath)
}
indexMergePath := ds.buildIndexMergeOrPath(filters, partialPaths, filterIdx)
indexMergePath.IndexMergeIsIntersection = indexMergeIsIntersection
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are using buildIndexMergeOrPath to handle both OR and AND cases. Is that expected and correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now indexMergeIsIntersection only works for ast.JSONContains. Is it any problem here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From its name, buildIndexMergeOrPath is for the OR, not AND.

mvIndexPaths = append(mvIndexPaths, indexMergePath)
}
}
return
}

// jsonArrayExpr2Exprs converts a JsonArray expression to expression list: cast('[1, 2, 3]' as JSON) --> []expr{1, 2, 3}
func jsonArrayExpr2Exprs(sctx sessionctx.Context, jsonArrayExpr expression.Expression) ([]expression.Expression, bool) {
// only support cast(const as JSON)
arrayExpr, wrappedByJSONCast := unwrapJSONCast(jsonArrayExpr)
if !wrappedByJSONCast {
return nil, false
}
if _, isConst := arrayExpr.(*expression.Constant); !isConst {
return nil, false
}
if expression.IsMutableEffectsExpr(arrayExpr) {
return nil, false
}

jsonArray, isNull, err := jsonArrayExpr.EvalJSON(sctx, chunk.Row{})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have two questions about this:

  1. Can we directly use the Constant from the last line to avoid the eval and transform logic here and below?
  2. It seems we need to consider it may be a parameter inside the Constant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't use EvalJSON here, we need to convert the array string like "[1, 2, 3]" to the corresponding slice like [1, 2, 3] manually, which seems dangerous. So just using EvalJSON + JSON.ArrayGetElem seems safer and easier.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it. What about the second question?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I use IsMutableEffectsExpr to check it here.

if isNull || err != nil {
return nil, false
}
if jsonArray.TypeCode != types.JSONTypeCodeArray {
single, ok := jsonValue2Expr(jsonArray) // '1' -> []expr{1}
if ok {
return []expression.Expression{single}, true
}
return nil, false
}
var exprs []expression.Expression
for i := 0; i < jsonArray.GetElemCount(); i++ { // '[1, 2, 3]' -> []expr{1, 2, 3}
expr, ok := jsonValue2Expr(jsonArray.ArrayGetElem(i))
if !ok {
return nil, false
}
exprs = append(exprs, expr)
}
return exprs, true
}

func jsonValue2Expr(v types.BinaryJSON) (expression.Expression, bool) {
if v.TypeCode != types.JSONTypeCodeInt64 {
// only support INT now
// TODO: support more types
return nil, false
}
val := v.GetInt64()
return &expression.Constant{
Value: types.NewDatum(val),
RetType: types.NewFieldType(mysql.TypeLonglong),
}, true
}

func unwrapJSONCast(expr expression.Expression) (expression.Expression, bool) {
if expr == nil {
return nil, false
Expand Down
14 changes: 13 additions & 1 deletion planner/core/testdata/index_merge_suite_in.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,19 @@
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10"
"select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '[1, 2, 3]')",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '[1, 2, 3]')",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('[1, 2, 3]', (j0->'$.path0'))",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '[1, 2, 3]') and a<10",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '[1, 2, 3]') and a<10",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('[1, 2, 3]', (j0->'$.path0')) and a<10",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '1')",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '1')",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('1', (j0->'$.path0'))",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '1') and a<10",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '1') and a<10",
"select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('1', (j0->'$.path0')) and a<10"
]
},
{
Expand Down
122 changes: 122 additions & 0 deletions planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,128 @@
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '[1, 2, 3]')",
"Plan": [
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[2,2], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[3,3], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '[1, 2, 3]')",
"Plan": [
"Selection 0.00 root json_overlaps(json_extract(test.t.j0, \"$.path0\"), cast(\"[1, 2, 3]\", json BINARY))",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this Selection is redundant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason is ast.JSONOverlaps cannot be pushed down to TiKV (https://github.com/pingcap/tidb/blob/master/expression/expression.go#L1036), and this let the optimizer keeps an Selection upon the DataSrouce (https://github.com/pingcap/tidb/blob/master/planner/core/rule_predicate_push_down.go#L141), which is expected.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the json_overlaps has already been handled by the IndexMerge.
And json_contains also doesn't have the Selection.

Copy link
Contributor Author

@qw4990 qw4990 Dec 30, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

YES, but it's another problem and I'll submit another PR to fix it after. Otherwise, this PR will become hard to review.

"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[2,2], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[3,3], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('[1, 2, 3]', (j0->'$.path0'))",
"Plan": [
"Selection 0.00 root json_overlaps(cast(\"[1, 2, 3]\", json BINARY), json_extract(test.t.j0, \"$.path0\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[2,2], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[3,3], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '[1, 2, 3]') and a<10",
"Plan": [
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[2,2], keep order:false, stats:pseudo",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[3,3], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '[1, 2, 3]') and a<10",
"Plan": [
"Selection 0.00 root json_overlaps(json_extract(test.t.j0, \"$.path0\"), cast(\"[1, 2, 3]\", json BINARY))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[2,2], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[3,3], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('[1, 2, 3]', (j0->'$.path0')) and a<10",
"Plan": [
"Selection 0.00 root json_overlaps(cast(\"[1, 2, 3]\", json BINARY), json_extract(test.t.j0, \"$.path0\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[2,2], keep order:false, stats:pseudo",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[3,3], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '1')",
"Plan": [
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
"└─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '1')",
"Plan": [
"Selection 0.00 root json_overlaps(json_extract(test.t.j0, \"$.path0\"), cast(\"1\", json BINARY))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('1', (j0->'$.path0'))",
"Plan": [
"Selection 0.00 root json_overlaps(cast(\"1\", json BINARY), json_extract(test.t.j0, \"$.path0\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_contains((j0->'$.path0'), '1') and a<10",
"Plan": [
"IndexMerge 0.00 root type: intersection",
"├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
"└─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps((j0->'$.path0'), '1') and a<10",
"Plan": [
"Selection 0.00 root json_overlaps(json_extract(test.t.j0, \"$.path0\"), cast(\"1\", json BINARY))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where json_overlaps('1', (j0->'$.path0')) and a<10",
"Plan": [
"Selection 0.00 root json_overlaps(cast(\"1\", json BINARY), json_extract(test.t.j0, \"$.path0\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
}
]
},
Expand Down