Skip to content

Commit

Permalink
planner: support converting json_member_of to IndexMerge to access …
Browse files Browse the repository at this point in the history
…MVIndex (#40175)

ref #40191
  • Loading branch information
qw4990 authored Dec 28, 2022
1 parent b268c65 commit f9af75f
Show file tree
Hide file tree
Showing 7 changed files with 292 additions and 2 deletions.
1 change: 1 addition & 0 deletions planner/core/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ go_test(
"flat_plan_test.go",
"fragment_test.go",
"indexmerge_intersection_test.go",
"indexmerge_path_test.go",
"indexmerge_test.go",
"integration_partition_test.go",
"integration_test.go",
Expand Down
6 changes: 6 additions & 0 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1419,6 +1419,12 @@ func (ds *DataSource) addSelection4PlanCache(task *rootTask, stats *property.Sta
// convertToIndexScan converts the DataSource to index scan with idx.
func (ds *DataSource) convertToIndexScan(prop *property.PhysicalProperty,
candidate *candidatePath, _ *physicalOptimizeOp) (task task, err error) {
if candidate.path.Index.MVIndex {
// MVIndex is special since different index rows may return the same _row_id and this can break some assumptions of IndexReader.
// Currently only support using IndexMerge to access MVIndex instead of IndexReader.
// TODO: make IndexReader support accessing MVIndex directly.
return invalidTask, nil
}
if !candidate.path.IsSingleScan {
// If it's parent requires single read task, return max cost.
if prop.TaskTp == property.CopSingleReadTaskType {
Expand Down
168 changes: 166 additions & 2 deletions planner/core/indexmerge_path.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/planner/util"
"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/zap"
Expand Down Expand Up @@ -63,7 +64,19 @@ func (ds *DataSource) generateIndexMergePath() error {
_, remaining := expression.PushDownExprs(stmtCtx, indexMergeConds, ds.ctx.GetClient(), kv.UnSpecified)
stmtCtx.SetWarnings(warnings)
stmtCtx.SetExtraWarnings(extraWarnings)
if len(remaining) != 0 {

remainingExpr := 0
for _, expr := range remaining {
// Handle these 3 functions specially since they can be used to access MVIndex.
if sf, ok := expr.(*expression.ScalarFunction); ok {
if sf.FuncName.L == ast.JSONMemberOf || sf.FuncName.L == ast.JSONOverlaps ||
sf.FuncName.L == ast.JSONContains {
continue
}
}
remainingExpr++
}
if remainingExpr > 0 {
needConsiderIndexMerge = false
}
}
Expand Down Expand Up @@ -435,8 +448,16 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio
if indexMergeAndPath != nil {
ds.possibleAccessPaths = append(ds.possibleAccessPaths, indexMergeAndPath)
}
// 3. Generate possible IndexMerge paths for MVIndex.
mvIndexMergePath, err := ds.generateIndexMergeJSONMVIndexPath(regularPathCount, indexMergeConds)
if err != nil {
return err
}
if mvIndexMergePath != nil {
ds.possibleAccessPaths = append(ds.possibleAccessPaths, mvIndexMergePath...)
}

// 3. If needed, append a warning if no IndexMerge is generated.
// 4. If needed, append a warning if no IndexMerge is generated.

// If without hints, it means that `enableIndexMerge` is true
if len(ds.indexMergeHints) == 0 {
Expand Down Expand Up @@ -467,3 +488,146 @@ func (ds *DataSource) generateAndPruneIndexMergePath(indexMergeConds []expressio
}
return nil
}

// generateIndexMergeJSONMVIndexPath generates paths for (json_member_of / json_overlaps / json_contains) on multi-valued index.
/*
1. select * from t where 1 member of (a)
IndexMerge(AND)
IndexRangeScan(a, [1,1])
TableRowIdScan(t)
2. select * from t where json_contains(a, '[1, 2, 3]')
IndexMerge(AND)
IndexRangeScan(a, [1,1])
IndexRangeScan(a, [2,2])
IndexRangeScan(a, [3,3])
TableRowIdScan(t)
3. select * from t where json_overlap(a, '[1, 2, 3]')
IndexMerge(OR)
IndexRangeScan(a, [1,1])
IndexRangeScan(a, [2,2])
IndexRangeScan(a, [3,3])
TableRowIdScan(t)
*/
func (ds *DataSource) generateIndexMergeJSONMVIndexPath(normalPathCnt int, filters []expression.Expression) (mvIndexPaths []*util.AccessPath, err error) {
for idx := 0; idx < normalPathCnt; idx++ {
if ds.possibleAccessPaths[idx].IsTablePath() || ds.possibleAccessPaths[idx].Index == nil || !ds.possibleAccessPaths[idx].Index.MVIndex {
continue // not a MVIndex path
}
if !ds.isSpecifiedInIndexMergeHints(ds.possibleAccessPaths[idx].Index.Name.L) {
continue // for safety, only consider using MVIndex when there is a `use_index_merge` hint now.
// TODO: remove this limitation
}

// Step 1. Extract the underlying JSON column from MVIndex Info.
mvIndex := ds.possibleAccessPaths[idx].Index
if len(mvIndex.Columns) != 1 {
// only support single-column MVIndex now: idx((cast(a->'$.zip' as signed array)))
// TODO: support composite MVIndex idx((x, cast(a->'$.zip' as int array), z))
continue
}
mvVirColOffset := mvIndex.Columns[0].Offset
mvVirColMeta := ds.table.Meta().Cols()[mvVirColOffset]

var virCol *expression.Column
for _, ce := range ds.TblCols {
if ce.ID == mvVirColMeta.ID {
virCol = ce.Clone().(*expression.Column)
virCol.RetType = ce.GetType().ArrayType() // use the underlying type directly: JSON-ARRAY(INT) --> INT
break
}
}
// unwrap the outside cast: cast(json_extract(test.t.a, $.zip), JSON) --> json_extract(test.t.a, $.zip)
targetJSONPath, ok := unwrapJSONCast(virCol.VirtualExpr)
if !ok {
continue
}

// Step 2. Iterate all filters and generate corresponding IndexMerge paths.
for filterIdx, filter := range filters {
// Step 2.1. Extract jsonPath and vals from json_member / json_overlaps / json_contains functions.
sf, ok := filter.(*expression.ScalarFunction)
if !ok {
continue
}

var jsonPath expression.Expression
var vals []expression.Expression
switch sf.FuncName.L {
case ast.JSONMemberOf: // (1 member of a->'$.zip')
jsonPath = sf.GetArgs()[1]
v, ok := unwrapJSONCast(sf.GetArgs()[0]) // cast(1 as json) --> 1
if !ok {
continue
}
vals = append(vals, v)
case ast.JSONOverlaps: // (json_overlaps(a->'$.zip', '[1, 2, 3]')
continue // TODO: support json_overlaps
case ast.JSONContains: // (json_contains(a->'$.zip', '[1, 2, 3]')
continue // TODO: support json_contains
default:
continue
}

// Step 2.2. Check some limitations.
if jsonPath == nil || len(vals) == 0 {
continue
}
if !jsonPath.Equal(ds.ctx, targetJSONPath) {
continue // not on the same JSON col
}
// only support INT now
// TODO: support more types
if jsonPath.GetType().EvalType() == types.ETInt {
continue
}
allInt := true
// TODO: support using IndexLookUp to handle single-value cases.
for _, v := range vals {
if v.GetType().EvalType() != types.ETInt {
allInt = false
}
}
if !allInt {
continue
}

// Step 2.3. Generate a IndexMerge Path of this filter on the current MVIndex.
var partialPaths []*util.AccessPath
for _, v := range vals {
partialPath := &util.AccessPath{Index: mvIndex}
partialPath.Ranges = ranger.FullRange()
// TODO: get the actual column length of this virtual column
partialPath.IdxCols, partialPath.IdxColLens = []*expression.Column{virCol}, []int{types.UnspecifiedLength}
partialPath.FullIdxCols, partialPath.FullIdxColLens = []*expression.Column{virCol}, []int{types.UnspecifiedLength}

// calculate the path range with the condition `a->'$.zip' = 1`.
eq, err := expression.NewFunction(ds.ctx, ast.EQ, types.NewFieldType(mysql.TypeTiny), virCol, v)
if err != nil {
return nil, err
}
if err = ds.detachCondAndBuildRangeForPath(partialPath, []expression.Expression{eq}); err != nil {
return nil, err
}

partialPaths = append(partialPaths, partialPath)
}
indexMergePath := ds.buildIndexMergeOrPath(filters, partialPaths, filterIdx)
mvIndexPaths = append(mvIndexPaths, indexMergePath)
}
}
return
}

func unwrapJSONCast(expr expression.Expression) (expression.Expression, bool) {
if expr == nil {
return nil, false
}
sf, ok := expr.(*expression.ScalarFunction)
if !ok {
return nil, false
}
if sf == nil || sf.FuncName.L != ast.Cast || sf.GetType().EvalType() != types.ETJson {
return nil, false
}
return sf.GetArgs()[0], true
}
53 changes: 53 additions & 0 deletions planner/core/indexmerge_path_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright 2022 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package core_test

import (
"testing"

"github.com/pingcap/tidb/planner/core"
"github.com/pingcap/tidb/testkit"
"github.com/pingcap/tidb/testkit/testdata"
)

func TestIndexMergeJSONMemberOf(t *testing.T) {
store := testkit.CreateMockStore(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t(
a int, j0 json, j1 json,
index j0_0((cast(j0->'$.path0' as signed array))),
index j0_1((cast(j0->'$.path1' as signed array))),
index j1((cast(j1 as signed array))))`)

var input []string
var output []struct {
SQL string
Plan []string
}
planSuiteData := core.GetIndexMergeSuiteData()
planSuiteData.LoadTestCases(t, &input, &output)

for i, query := range input {
testdata.OnRecord(func() {
output[i].SQL = query
})
result := tk.MustQuery("explain format = 'brief' " + query)
testdata.OnRecord(func() {
output[i].Plan = testdata.ConvertRowsToStrings(result.Rows())
})
result.Check(testkit.Rows(output[i].Plan...))
}
}
3 changes: 3 additions & 0 deletions planner/core/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -4673,7 +4673,10 @@ func (b *PlanBuilder) buildDataSource(ctx context.Context, tn *ast.TableName, as
if i < len(columns) {
if columns[i].IsGenerated() && !columns[i].GeneratedStored {
var err error
originVal := b.allowBuildCastArray
b.allowBuildCastArray = true
expr, _, err = b.rewrite(ctx, columns[i].GeneratedExpr, ds, nil, true)
b.allowBuildCastArray = originVal
if err != nil {
return nil, err
}
Expand Down
10 changes: 10 additions & 0 deletions planner/core/testdata/index_merge_suite_in.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
[
{
"name": "TestIndexMergeJSONMemberOf",
"cases": [
"select /*+ use_index_merge(t, j0_0) */ * from t where (1 member of (j0->'$.path0'))",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10",
"select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10"
]
},
{
"name": "TestIndexMergePathGeneration",
"cases": [
Expand Down
53 changes: 53 additions & 0 deletions planner/core/testdata/index_merge_suite_out.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,57 @@
[
{
"Name": "TestIndexMergeJSONMemberOf",
"Cases": [
{
"SQL": "select /*+ use_index_merge(t, j0_0) */ * from t where (1 member of (j0->'$.path0'))",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path0\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_0(cast(json_extract(`j0`, _utf8mb4'$.path0') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─TableRowIDScan(Probe) 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and a<10",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\"))",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_1(cast(json_extract(`j0`, _utf8mb4'$.path1') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.XXX')) and a<10",
"Plan": [
"Selection 2658.67 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.XXX\"))",
"└─TableReader 3323.33 root data:Selection",
" └─Selection 3323.33 cop[tikv] lt(test.t.a, 10)",
" └─TableFullScan 10000.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j0_1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\")), json_memberof(cast(2, json BINARY), test.t.j1)",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j0_1(cast(json_extract(`j0`, _utf8mb4'$.path1') as signed array)) range:[1,1], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
},
{
"SQL": "select /*+ use_index_merge(t, j1) */ * from t where (1 member of (j0->'$.path1')) and (2 member of (j1)) and a<10",
"Plan": [
"Selection 0.00 root json_memberof(cast(1, json BINARY), json_extract(test.t.j0, \"$.path1\")), json_memberof(cast(2, json BINARY), test.t.j1)",
"└─IndexMerge 0.00 root type: union",
" ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:j1(cast(`j1` as signed array)) range:[2,2], keep order:false, stats:pseudo",
" └─Selection(Probe) 0.00 cop[tikv] lt(test.t.a, 10)",
" └─TableRowIDScan 0.00 cop[tikv] table:t keep order:false, stats:pseudo"
]
}
]
},
{
"Name": "TestIndexMergePathGeneration",
"Cases": [
Expand Down

0 comments on commit f9af75f

Please sign in to comment.