Skip to content

Commit

Permalink
KYLIN-3352 better filter transform for better seg pruning
Browse files Browse the repository at this point in the history
  • Loading branch information
liyang-kylin authored and shaofengshi committed May 27, 2018
1 parent bbb71a8 commit 1a1c8a9
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 30 deletions.
28 changes: 21 additions & 7 deletions core-cube/src/main/java/org/apache/kylin/gridtable/GTUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.kylin.metadata.filter.FilterOptimizeTransformer;
import org.apache.kylin.metadata.filter.IFilterCodeSystem;
import org.apache.kylin.metadata.filter.TupleFilter;
import org.apache.kylin.metadata.filter.TupleFilter.FilterOperatorEnum;
import org.apache.kylin.metadata.filter.TupleFilterSerializer;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TblColRef;
Expand Down Expand Up @@ -154,8 +155,7 @@ public TupleFilter onSerialize(TupleFilter filter) {
// In case of NOT(unEvaluatableFilter), we should immediately replace it as TRUE,
// Otherwise, unEvaluatableFilter will later be replace with TRUE and NOT(unEvaluatableFilter)
// will always return FALSE.
if (filter.getOperator() == TupleFilter.FilterOperatorEnum.NOT
&& !TupleFilter.isEvaluableRecursively(filter)) {
if (filter.getOperator() == FilterOperatorEnum.NOT && !TupleFilter.isEvaluableRecursively(filter)) {
TupleFilter.collectColumns(filter, unevaluatableColumnCollector);
return ConstantTupleFilter.TRUE;
}
Expand All @@ -181,7 +181,6 @@ public TupleFilter onSerialize(TupleFilter filter) {
return filter;
}

@SuppressWarnings({ "rawtypes", "unchecked" })
protected TupleFilter encodeConstants(CompareTupleFilter oldCompareFilter) {
// extract ColumnFilter & ConstantFilter
TblColRef externalCol = oldCompareFilter.getColumn();
Expand Down Expand Up @@ -249,9 +248,13 @@ protected TupleFilter encodeConstants(CompareTupleFilter oldCompareFilter) {
}
break;
case LT:
code = translate(col, firstValue, 1);
code = translate(col, firstValue, 0);
if (code == null) {
result = ConstantTupleFilter.TRUE;
code = translate(col, firstValue, -1);
if (code == null)
result = ConstantTupleFilter.FALSE;
else
result = newCompareFilter(FilterOperatorEnum.LTE, externalCol, code);
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
result = newCompareFilter;
Expand All @@ -267,9 +270,13 @@ protected TupleFilter encodeConstants(CompareTupleFilter oldCompareFilter) {
}
break;
case GT:
code = translate(col, firstValue, -1);
code = translate(col, firstValue, 0);
if (code == null) {
result = ConstantTupleFilter.TRUE;
code = translate(col, firstValue, 1);
if (code == null)
result = ConstantTupleFilter.FALSE;
else
result = newCompareFilter(FilterOperatorEnum.GTE, externalCol, code);
} else {
newCompareFilter.addChild(new ConstantTupleFilter(code));
result = newCompareFilter;
Expand All @@ -290,6 +297,13 @@ protected TupleFilter encodeConstants(CompareTupleFilter oldCompareFilter) {
return result;
}

private TupleFilter newCompareFilter(FilterOperatorEnum op, TblColRef col, ByteArray code) {
CompareTupleFilter r = new CompareTupleFilter(op);
r.addChild(new ColumnTupleFilter(col));
r.addChild(new ConstantTupleFilter(code));
return r;
}

transient ByteBuffer buf;

protected ByteArray translate(int col, Object value, int roundingFlag) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ public String toString() {

// TODO requires generalize, currently only evaluates COLUMN {op} CONST
@Override
@SuppressWarnings({ "unchecked", "rawtypes" })
public boolean evaluate(IEvaluatableTuple tuple, IFilterCodeSystem cs) {
// extract tuple value
Object tupleValue = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import org.apache.kylin.common.util.LocalFileMetadataTestCase;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.gridtable.CubeCodeSystem;
import org.apache.kylin.dict.NumberDictionaryBuilder;
import org.apache.kylin.dict.NumberDictionaryForestBuilder;
import org.apache.kylin.dict.StringBytesConverter;
import org.apache.kylin.dict.TrieDictionaryBuilder;
import org.apache.kylin.dimension.DictionaryDimEnc;
Expand Down Expand Up @@ -441,19 +441,61 @@ public void verifyConvertFilterConstants2() {
TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp");
TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer");

CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.LT, "9");
LogicalTupleFilter filter = and(fComp1, fComp2);

List<TblColRef> colMapping = Lists.newArrayList();
colMapping.add(extColA);
colMapping.add(extColB);

CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");

// $1<"9" round down to FALSE
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "9"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(ConstantTupleFilter.FALSE, newFilter);
}

// $1<"9" round up to $1<"10"
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LT [\\x00]]",
newFilter.toString());
// $1<"10" needs no rounding
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "10"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LT [\\x00]]",
newFilter.toString());
}

// $1<"11" round down to <="10"
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LT, "11"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
newFilter.toString());
}

// $1<="9" round down to FALSE
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "9"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(ConstantTupleFilter.FALSE, newFilter);
}

// $1<="10" needs no rounding
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "10"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
newFilter.toString());
}

// $1<="11" round down to <="10"
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.LTE, "11"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 LTE [\\x00]]",
newFilter.toString());
}
}

@Test
Expand All @@ -464,17 +506,61 @@ public void verifyConvertFilterConstants3() {
TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp");
TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer");

CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");
CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.LTE, "9");
LogicalTupleFilter filter = and(fComp1, fComp2);

List<TblColRef> colMapping = Lists.newArrayList();
colMapping.add(extColA);
colMapping.add(extColB);

CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14");

// $1>"101" round up to FALSE
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "101"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(ConstantTupleFilter.FALSE, newFilter);
}

// $1<="9" round down to FALSE
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(ConstantTupleFilter.FALSE, newFilter);
// $1>"100" needs no rounding
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "100"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GT [\\x09]]",
newFilter.toString());
}

// $1>"99" round up to >="100"
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GT, "99"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
newFilter.toString());
}

// $1>="101" round up to FALSE
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "101"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(ConstantTupleFilter.FALSE, newFilter);
}

// $1>="100" needs no rounding
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "100"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
newFilter.toString());
}

// $1>="99" round up to >="100"
{
LogicalTupleFilter filter = and(fComp1, compare(extColB, FilterOperatorEnum.GTE, "99"));
TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null);
assertEquals(
"AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GTE [\\x09]]",
newFilter.toString());
}
}

@Test
Expand Down Expand Up @@ -639,15 +725,13 @@ static GTInfo newInfo() {
return info;
}

@SuppressWarnings("unchecked")
private static CubeCodeSystem newDictCodeSystem() {
DimensionEncoding[] dimEncs = new DimensionEncoding[3];
dimEncs[1] = new DictionaryDimEnc(newDictionaryOfInteger());
dimEncs[2] = new DictionaryDimEnc(newDictionaryOfString());
return new CubeCodeSystem(dimEncs);
}

@SuppressWarnings("rawtypes")
private static Dictionary newDictionaryOfString() {
TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter());
builder.addValue("Dong");
Expand All @@ -663,9 +747,8 @@ private static Dictionary newDictionaryOfString() {
return builder.build(0);
}

@SuppressWarnings("rawtypes")
private static Dictionary newDictionaryOfInteger() {
NumberDictionaryBuilder builder = new NumberDictionaryBuilder();
NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder();
builder.addValue("10");
builder.addValue("20");
builder.addValue("30");
Expand All @@ -676,7 +759,7 @@ private static Dictionary newDictionaryOfInteger() {
builder.addValue("80");
builder.addValue("90");
builder.addValue("100");
return builder.build(0);
return builder.build();
}

public static ImmutableBitSet setOf(int... values) {
Expand Down

0 comments on commit 1a1c8a9

Please sign in to comment.