Low key not set for the index lookup on the inner relation of index nested loops join #1232

linmagit · 2020-10-12T18:36:20Z

Bug Report

Summary

When we run this TATP query, we do not generate the low key for the index lookup on the inner relation but only the high key. Thus, we scan much more tuples than necessary. This only occurs for the index nested loops join in TATP but not TPCC. It may be related to the fact that we're doing a range index scan on the inner relation in TATP but only a point lookup in TPCC. I also checked that this is independent of the protocol (both the simple and extended protocol have this problem).

Environment

I don't think the environment affects the behavior. I used g++ 7.5.0 on Ubuntu 18.04

Steps to Reproduce

(optional) Add ast::AstPrettyPrint::Dump(std::cerr, root_); here so that you can see the generated TPL program.
Compile with the following args: -DCMAKE_BUILD_TYPE=Release
Run terrier binary
Run TATP through OLTPBench against terrier.
The TPL program generated for the above-mentioned index nested loops join is as following:

struct OutputStruct {
    out0: StringVal
}
struct QueryState {
    execCtx: *ExecutionContext
}
struct P1_State {
    num_scans_index : uint32
    index_size      : uint32
    num_scans_index1: uint32
    num_loops       : uint32
    output_buffer   : *OutputBuffer
    num_output      : uint32
    execFeatures    : ExecOUFeatureVector
}
fun Query24_Init(queryState: *QueryState) -> nil {
    return
}

fun Query24_Pipeline1_InitPipelineState(queryState: *QueryState, pipelineState: *P1_State) -> nil {
    pipelineState.output_buffer = @resultBufferNew(queryState.execCtx)
    pipelineState.num_output = 0
    pipelineState.index_size = 0
    pipelineState.num_scans_index1 = 0
    pipelineState.num_loops = 0
    pipelineState.num_scans_index = 0
    return
}

fun Query24_Pipeline1_TearDownPipelineState(queryState: *QueryState, pipelineState: *P1_State) -> nil {
    @resultBufferFree(pipelineState.output_buffer)
    @execOUFeatureVectorReset(&pipelineState.execFeatures)
    return
}

fun Query24_Pipeline1_SerialWork(queryState: *QueryState, pipelineState: *P1_State) -> nil {
    var col_oids1: [3]uint32
    col_oids1[0] = 3
    col_oids1[1] = 1
    col_oids1[2] = 2
    var index_iter1: IndexIterator
    @indexIteratorInit(&index_iter1, queryState.execCtx, 2, 1008, 1009, col_oids1)
    var index_pr = @indexIteratorGetPR(&index_iter1)
    @prSetInt(index_pr, 0, @getParamInt(queryState.execCtx, 0))
    @prSetTinyInt(index_pr, 1, @getParamTinyInt(queryState.execCtx, 1))
    for (@indexIteratorScanKey(&index_iter1); @indexIteratorAdvance(&index_iter1); ) {
        var table_pr1 = @indexIteratorGetTablePR(&index_iter1)
        var slot1 = @indexIteratorGetSlot(&index_iter1)
        if (SqlBoolToBool(@prGetInt(table_pr1, 0) == @getParamInt(queryState.execCtx, 0)) and SqlBoolToBool(@prGetTinyInt(table_pr1, 1) == @getParamTinyInt(queryState.execCtx, 1)) and SqlBoolToBool(@prGetTinyInt(table_pr1, 2) == @intToSql(1))) {
            var col_oids: [5]uint32
            col_oids[0] = 5
            col_oids[1] = 2
            col_oids[2] = 1
            col_oids[3] = 4
            col_oids[4] = 3
            var index_iter: IndexIterator
            @indexIteratorInit(&index_iter, queryState.execCtx, 3, 1010, 1011, col_oids)
            var lo_index_pr = @indexIteratorGetLoPR(&index_iter)
            var hi_index_pr = @indexIteratorGetHiPR(&index_iter)
            @prSetTinyInt(hi_index_pr, 1, @prGetTinyInt(table_pr1, 1))
            @prSetTinyInt(hi_index_pr, 2, @getParamTinyInt(queryState.execCtx, 2))
            @prSetInt(hi_index_pr, 0, @prGetInt(table_pr1, 0))
            pipelineState.num_loops = pipelineState.num_loops + IntegralCast(1)
            for (@indexIteratorScanAscending(&index_iter, 1, 0); @indexIteratorAdvance(&index_iter); ) {
                var table_pr = @indexIteratorGetTablePR(&index_iter)
                var slot = @indexIteratorGetSlot(&index_iter)
                if (SqlBoolToBool(@prGetTinyInt(table_pr, 3) <= @getParamTinyInt(queryState.execCtx, 2)) and SqlBoolToBool(@prGetTinyIntNull(table_pr, 4) > @getParamTinyInt(queryState.execCtx, 3)) and SqlBoolToBool(@prGetInt(table_pr, 1) == @prGetInt(table_pr1, 0)) and SqlBoolToBool(@prGetTinyInt(table_pr, 2) == @prGetTinyInt(table_pr1, 1))) {
                    var outRow = @ptrCast(*OutputStruct, @resultBufferAllocRow(pipelineState.output_buffer))
                    outRow.out0 = @prGetVarlenNull(table_pr, 0)
                    pipelineState.num_output = pipelineState.num_output + IntegralCast(1)
                }
                pipelineState.num_scans_index1 = pipelineState.num_scans_index1 + IntegralCast(1)
            }
            pipelineState.index_size = @indexIteratorGetSize(&index_iter)
            @indexIteratorFree(&index_iter)
        }
        pipelineState.num_scans_index = pipelineState.num_scans_index + IntegralCast(1)
    }
    @indexIteratorFree(&index_iter1)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10028, 0, 0, @indexIteratorGetSize(&index_iter1))
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10028, 1, 0, pipelineState.num_scans_index)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10027, 0, 0, pipelineState.num_scans_index)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10027, 1, 0, pipelineState.num_scans_index)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10026, 0, 0, pipelineState.num_scans_index)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10026, 1, 0, pipelineState.num_scans_index)
    return
}

fun Query24_Pipeline1_Init(queryState: *QueryState) -> nil {
    var threadStateContainer = @execCtxGetTLS(queryState.execCtx)
    @tlsReset(threadStateContainer, @sizeOf(P1_State), Query24_Pipeline1_InitPipelineState, Query24_Pipeline1_TearDownPipelineState, queryState)
    return
}

fun Query24_Pipeline1_Run(queryState: *QueryState) -> nil {
    var pipelineState = @ptrCast(*P1_State, @tlsGetCurrentThreadState(@execCtxGetTLS(queryState.execCtx)))
    @execOUFeatureVectorInit(queryState.execCtx, &pipelineState.execFeatures, 1, false)
    @registerMetricsThread(queryState.execCtx)
    @execCtxStartPipelineTracker(queryState.execCtx, 1)
    Query24_Pipeline1_SerialWork(queryState, pipelineState)
    var per_loop_num_scans = pipelineState.num_scans_index1
    if (pipelineState.num_loops > 0) {
        per_loop_num_scans = per_loop_num_scans / pipelineState.num_loops
    }
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10025, 1, 0, per_loop_num_scans)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10025, 0, 0, pipelineState.index_size)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10025, 2, 0, pipelineState.num_loops)
    @resultBufferFinalize(pipelineState.output_buffer)
    @execOUFeatureVectorRecordFeature(&pipelineState.execFeatures, 1, 10024, 0, 0, pipelineState.num_output)
    @execCtxEndPipelineTracker(queryState.execCtx, 24, 1, &pipelineState.execFeatures)
    @execOUFeatureVectorReset(&pipelineState.execFeatures)
    return
}

fun Query24_Pipeline1_TearDown(queryState: *QueryState) -> nil {
    @tlsClear(@execCtxGetTLS(queryState.execCtx))
    @checkTrackersStopped(queryState.execCtx)
    return
}

fun Query24_TearDown(queryState: *QueryState) -> nil {
    return
}

The low keys are not set in function Query24_Pipeline1_SerialWork.

The text was updated successfully, but these errors were encountered:

thepinetree · 2020-10-13T20:02:21Z

What we do is choose an index type based on open highs and open lows. In particular we choose between:
Exact,
AscendingClosed,
AscendingOpenHigh,
AscendingOpenLow,
AscendingOpenBoth,

(Idt these two are implemented)
Descending,
DescendingLimit

If the number of open highs equal the number of open lows and they are both set for each oid then we choose a closed scan (and an exact scan if it is a single tuple). Otherwise if a lower bound exists for an oid and upper bound does not, then we choose an ascending open high, and if an upper bound exists for an old and the lower bound does not, then we choose an ascending open low.

I suspect then that in the index implementation when an openhigh/openlow is set it doesn’t use anything else except the respective bounds it uses.

thepinetree · 2020-10-13T20:02:52Z

@abhijithanilkumar can you take a look at the index implementations and see whether it uses the bounds correctly?

thepinetree · 2020-10-25T10:52:46Z

I've determined that an OpenHigh index scan only uses lo column bounds, and an OpenLow index scan only uses hi column bounds in plan_generator.cpp here. It remains to be seen whether the index implementations would be amenable to adding all bounds regardless of the index scan type and being better able to move around the index accordingly with some partial key lookup. @abhijithanilkumar let me know if that makes sense.

linmagit added bug Something isn't working (correctness). Mark issues with this. performance Performance related issues or changes. labels Oct 12, 2020

apavlo assigned abhijithanilkumar and thepinetree and unassigned abhijithanilkumar Oct 13, 2020

thepinetree assigned abhijithanilkumar Oct 13, 2020

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Low key not set for the index lookup on the inner relation of index nested loops join #1232

Low key not set for the index lookup on the inner relation of index nested loops join #1232

linmagit commented Oct 12, 2020

thepinetree commented Oct 13, 2020

thepinetree commented Oct 13, 2020

thepinetree commented Oct 25, 2020 •

edited

Loading

Low key not set for the index lookup on the inner relation of index nested loops join #1232

Low key not set for the index lookup on the inner relation of index nested loops join #1232

Comments

linmagit commented Oct 12, 2020

Bug Report

Summary

Environment

Steps to Reproduce

thepinetree commented Oct 13, 2020

thepinetree commented Oct 13, 2020

thepinetree commented Oct 25, 2020 • edited Loading

thepinetree commented Oct 25, 2020 •

edited

Loading