Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support converting json_member_of to IndexMerge to access MVIndex #40175

Merged
merged 14 commits into from
Dec 28, 2022
3 changes: 2 additions & 1 deletion planner/core/expression_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,8 @@ func (er *expressionRewriter) Leave(originInNode ast.Node) (retNode ast.Node, ok
er.disableFoldCounter--
}
case *ast.FuncCastExpr:
if v.Tp.IsArray() && !er.b.allowBuildCastArray {
allowBuildCastArray4Test := er.ctx.Value("____allow_build_cast_array_for_test") != nil
if v.Tp.IsArray() && !er.b.allowBuildCastArray && !allowBuildCastArray4Test {
er.err = expression.ErrNotSupportedYet.GenWithStackByArgs("Use of CAST( .. AS .. ARRAY) outside of functional index in CREATE(non-SELECT)/ALTER TABLE or in general expressions")
return retNode, false
}
Expand Down
6 changes: 6 additions & 0 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,12 @@ func (ds *DataSource) addSelection4PlanCache(task *rootTask, stats *property.Sta
// convertToIndexScan converts the DataSource to index scan with idx.
func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty,
candidate *candidatePath, _ *physicalOptimizeOp) (task task, err error) {
if candidate.path.Index.MVIndex {
// MVIndex is special since different index rows may return the same _row_id and this can break some assumptions of IndexReader.
// Currently only support using IndexMerge to access MVIndex instead of IndexReader.
// TODO: make IndexReader support accessing MVIndex directly.
return invalidTask, nil
}
if !candidate.path.IsSingleScan {
// If it's parent requires single read task, return max cost.
if prop.TaskTp == property.CopSingleReadTaskType {
Expand Down
162 changes: 160 additions & 2 deletions planner/core/indexmerge_path.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/zap"
Expand Down Expand Up @@ -63,7 +64,19 @@ func (ds *DataSource) generateIndexMergePath() error {
_, remaining := expression.PushDownExprs(stmtCtx, indexMergeConds, ds.ctx.GetClient(), kv.UnSpecified)
stmtCtx.SetWarnings(warnings)
stmtCtx.SetExtraWarnings(extraWarnings)
if len(remaining) != 0 {

remainingExpr := 0
for _, expr := range remaining {
// Handle these 3 functions specially since they can be used to access MVIndex.
if sf, ok := expr.(*expression.ScalarFunction); ok {
if sf.FuncName.L == ast.JSONMemberOf || sf.FuncName.L == ast.JSONOverlaps ||
sf.FuncName.L == ast.JSONContains {
continue
}
}
remainingExpr++
}
if remainingExpr > 0 {
needConsiderIndexMerge = false
}
}
Expand Down Expand Up @@ -435,8 +448,16 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio
if indexMergeAndPath != nil {
ds.possibleAccessPaths = append(ds.possibleAccessPaths, indexMergeAndPath)
}
// 3. Generate possible IndexMerge paths for MVIndex.
mvIndexMergePath, err := ds.generateIndexMergeJSONMVIndexPath(regularPathCount, indexMergeConds)
if err != nil {
return err
}
if mvIndexMergePath != nil {
ds.possibleAccessPaths = append(ds.possibleAccessPaths, mvIndexMergePath...)
}

// 3. If needed, append a warning if no IndexMerge is generated.
// 4. If needed, append a warning if no IndexMerge is generated.

// If without hints, it means that `enableIndexMerge` is true
if len(ds.indexMergeHints) == 0 {
Expand Down Expand Up @@ -467,3 +488,140 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio
}
return nil
}

// generateIndexMergeJSONMVIndexPath generates paths for (json_member_of / json_overlaps / json_contains) on multi-valued index.
/*
1. select * from t where 1 member of (a)
IndexMerge(AND)
IndexRangeScan(a, [1,1])
TableRowIdScan(t)
2. select * from t where json_contains(a, '[1, 2, 3]')
IndexMerge(AND)
IndexRangeScan(a, [1,1])
IndexRangeScan(a, [2,2])
IndexRangeScan(a, [3,3])
TableRowIdScan(t)
3. select * from t where json_overlap(a, '[1, 2, 3]')
IndexMerge(OR)
IndexRangeScan(a, [1,1])
IndexRangeScan(a, [2,2])
IndexRangeScan(a, [3,3])
TableRowIdScan(t)
*/
func (ds *DataSource) generateIndexMergeJSONMVIndexPath(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) {
for idx := 0; idx < normalPathCnt; idx++ {
if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex {
continue // not a MVIndex path
}
if !ds.isSpecifiedInIndexMergeHints(ds.possibleAccessPaths[idx].Index.Name.L) {
continue // for safety, only consider using MVIndex when there is a `use_index_merge` hint now.
// TODO: remove this limitation
Copy link
Contributor Author

@qw4990 qw4990 Dec 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll remove the limitations and TODOs below in the following PRs.

}

// Step 1. Extract the underlying JSON column from MVIndex Info.
mvIndex := ds.possibleAccessPaths[idx].Index
mvVirColOffset := mvIndex.Columns[0].Offset // MVIndex has and only has 1 vir-col: index idx((cast(a->'$.zip' as signed array)))
xiongjiwei marked this conversation as resolved.
Show resolved Hide resolved
mvVirCol := ds.table.Meta().Cols()[mvVirColOffset]

var virCol *expression.Column
for _, ce := range ds.TblCols {
if ce.ID == mvVirCol.ID {
virCol = ce.Clone().(*expression.Column)
virCol.GetType().SetArray(false) // JSON-ARRAY(INT) --> INT
xiongjiwei marked this conversation as resolved.
Show resolved Hide resolved
break
}
}
// unwrap the outside cast: cast(json_extract(test.t.a, $.zip), JSON) --> json_extract(test.t.a, $.zip)
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
targetJSONPath, ok := unwrapCast(virCol.VirtualExpr)
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
if !ok {
continue
}

// Step 2. Iterate all filters and generate corresponding IndexMerge paths.
for i, filter := range filters {
// Step 2.1. Extract jsonPath and vals from json_member / json_overlaps / json_contains functions.
sf, ok := filter.(*expression.ScalarFunction)
if !ok {
continue
}

var jsonPath expression.Expression
var vals []expression.Expression
switch sf.FuncName.L {
case ast.JSONMemberOf: // (1 member of a->'$.zip')
jsonPath = sf.GetArgs()[1]
v, ok := unwrapCast(sf.GetArgs()[0]) // cast(1 as json) --> 1
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
if !ok {
continue
}
vals = append(vals, v)
case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]')
continue // TODO: support json_overlaps
case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]')
continue // TODO: support json_contains
default:
continue
}

// Step 2.2. Check some limitations.
if jsonPath == nil || len(vals) == 0 {
continue
}
if !jsonPath.Equal(ds.ctx, targetJSONPath) {
continue // not on the same JSON col
}
// only support INT now
// TODO: support more types
if jsonPath.GetType().EvalType() == types.ETInt {
continue
}
allInt := true
for _, v := range vals {
if v.GetType().EvalType() != types.ETInt {
allInt = false
}
}
if !allInt {
continue
}

// Step 2.3. Generate a IndexMerge Path of this filter on the current MVIndex.
var partialPaths []*util.AccessPath
for _, v := range vals {
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
partialPath := &util.AccessPath{Index: mvIndex}
partialPath.Ranges = ranger.FullRange()
// TODO: get the actual column length of this virtual column
partialPath.IdxCols, partialPath.IdxColLens = []*expression.Column{virCol}, []int{types.UnspecifiedLength}
partialPath.FullIdxCols, partialPath.FullIdxColLens = []*expression.Column{virCol}, []int{types.UnspecifiedLength}

// calculate the path range with the condition `a->'$.zip' = 1`.
eq, err := expression.NewFunction(ds.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), virCol, v)
if err != nil {
return nil, err
}
if err = ds.detachCondAndBuildRangeForPath(partialPath, []expression.Expression{eq}); err != nil {
return nil, err
}

partialPaths = append(partialPaths, partialPath)
}
indexMergePath := ds.buildIndexMergeOrPath(filters, partialPaths, i)
mvIndexPaths = append(mvIndexPaths, indexMergePath)
}
}
return
}

func unwrapCast(expr expression.Expression) (expression.Expression, bool) {
qw4990 marked this conversation as resolved.
Show resolved Hide resolved
if expr == nil {
return nil, false
}
sf, ok := expr.(*expression.ScalarFunction)
if !ok {
return nil, false
}
if sf == nil || sf.FuncName.L != ast.Cast {
return nil, false
}
return sf.GetArgs()[0], true
}
55 changes: 55 additions & 0 deletions planner/core/indexmerge_path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright 2022 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package core_test

import (
"context"
"testing"

"github.com/pingcap/tidb/planner/core"
"github.com/pingcap/tidb/testkit"
"github.com/pingcap/tidb/testkit/testdata"
)

func TestIndexMergeJSONMemberOf(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t(
a int, j0 json, j1 json,
index j0_0((cast(j0->'$.path0' as signed array))),
index j0_1((cast(j0->'$.path1' as signed array))),
index j1((cast(j1 as signed array))))`)

var input []string
var output []struct {
SQL string
Plan []string
}
planSuiteData := core.GetIndexMergeSuiteData()
planSuiteData.LoadTestCases(t, &input, &output)

ctx := context.WithValue(context.Background(), "____allow_build_cast_array_for_test", struct{}{})
for i, query := range input {
testdata.OnRecord(func() {
output[i].SQL = query
})
result := tk.MustQueryWithContext(ctx, "explain format = 'brief' "+query)
testdata.OnRecord(func() {
output[i].Plan = testdata.ConvertRowsToStrings(result.Rows())
})
result.Check(testkit.Rows(output[i].Plan...))
}
}
10 changes: 10 additions & 0 deletions planner/core/testdata/index_merge_suite_in.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
[
{
"name": "TestIndexMergeJSONMemberOf",
"cases": [
"select /*+ use_index_merge(t, j0_0) */ * from t where (1 member of (j0->'$.path0'))",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10"
]
},
{
"name": "TestIndexMergePathGeneration",
"cases": [
Expand Down
53 changes: 53 additions & 0 deletions planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,57 @@
[
{
"Name": "TestIndexMergeJSONMemberOf",
"Cases": [
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where (1 member of (j0->'$.path0'))",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path0\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_1(cast(json_extract(`j0`, _utf8mb4'$.path1') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10",
"Plan": [
"Selection 2658.67 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.XXX\"))",
"└─TableReader 3323.33 root data:Selection",
" └─Selection 3323.33 cop[tikv] lt(test.t.a, 10)",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\")), json_memberof(cast(2, json BINARY), test.t.j1)",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_1(cast(json_extract(`j0`, _utf8mb4'$.path1') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\")), json_memberof(cast(2, json BINARY), test.t.j1)",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j1(cast(`j1` as signed array)) range:[2,2], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
}
]
},
{
"Name": "TestIndexMergePathGeneration",
"Cases": [
Expand Down