Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce amount of expression objects created during evaluations #15552

Merged
merged 15 commits into from
Dec 15, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.filter.AndDimFilter;
import org.apache.druid.query.filter.DimFilter;
Expand Down Expand Up @@ -70,6 +71,7 @@ public class ExpressionFilterBenchmark
{
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
}

@Param({"1000000"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.java.util.common.guava.Sequence;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.ExtractionDimensionSpec;
import org.apache.druid.query.expression.LookupEnabledTestExprMacroTable;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.StrlenExtractionFn;
import org.apache.druid.query.extraction.TimeFormatExtractionFn;
Expand Down Expand Up @@ -59,21 +61,21 @@
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.BitSet;
import java.util.List;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 15)
@Measurement(iterations = 30)
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 10, time = 3)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class ExpressionSelectorBenchmark
{
static {
NullHandling.initializeForTests();
ExpressionProcessing.initializeForTests();
}

@Param({"1000000"})
Expand Down Expand Up @@ -451,6 +453,154 @@ public void stringConcatAndCompareOnLong(Blackhole blackhole)
blackhole.consume(results);
}

@Benchmark
public void caseSearched1(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"case_searched(s == 'asd' || isnull(s) || s == 'xxx', 1, s == 'foo' || s == 'bar', 2, 3)",
ColumnType.LONG,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);

final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();

blackhole.consume(results);
}

@Benchmark
public void caseSearched2(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"case_searched(s == 'asd' || isnull(s) || n == 1, 1, n == 2, 2, 3)",
ColumnType.LONG,
TestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);

final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();

blackhole.consume(results);
}

@Benchmark
public void caseSearchedWithLookup(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"v",
"case_searched(n == 1001, -1, "
+ "lookup(s, 'lookyloo') == 'asd1', 1, "
+ "lookup(s, 'lookyloo') == 'asd2', 2, "
+ "lookup(s, 'lookyloo') == 'asd3', 3, "
+ "lookup(s, 'lookyloo') == 'asd4', 4, "
+ "lookup(s, 'lookyloo') == 'asd5', 5, "
+ "-2)",
ColumnType.LONG,
LookupEnabledTestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);

final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();

blackhole.consume(results);
}

@Benchmark
public void caseSearchedWithLookup2(Blackhole blackhole)
{
final Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(
null,
index.getDataInterval(),
VirtualColumns.create(
ImmutableList.of(
new ExpressionVirtualColumn(
"ll",
"lookup(s, 'lookyloo')",
ColumnType.STRING,
LookupEnabledTestExprMacroTable.INSTANCE
),
new ExpressionVirtualColumn(
"v",
"case_searched(n == 1001, -1, "
+ "ll == 'asd1', 1, "
+ "ll == 'asd2', 2, "
+ "ll == 'asd3', 3, "
+ "ll == 'asd4', 4, "
+ "ll == 'asd5', 5, "
+ "-2)",
ColumnType.LONG,
LookupEnabledTestExprMacroTable.INSTANCE
)
)
),
Granularities.ALL,
false,
null
);

final List<?> results = cursors
.map(cursor -> {
final ColumnValueSelector selector = cursor.getColumnSelectorFactory().makeColumnValueSelector("v");
consumeLong(cursor, selector, blackhole);
return null;
})
.toList();

blackhole.consume(results);
}



private void consumeDimension(final Cursor cursor, final DimensionSelector selector, final Blackhole blackhole)
{
if (selector.getValueCardinality() >= 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,12 @@ public int hashCode()

class LongExpr extends ConstantExpr<Long>
{
final ExprEval expr;
kgyrtkirk marked this conversation as resolved.
Show resolved Hide resolved

LongExpr(Long value)
{
super(ExpressionType.LONG, Preconditions.checkNotNull(value, "value"));
expr = ExprEval.ofLong(value);
}

@Override
Expand All @@ -171,7 +174,7 @@ public String toString()
@Override
public ExprEval eval(ObjectBinding bindings)
{
return ExprEval.ofLong(value);
return expr;
}

@Override
Expand Down Expand Up @@ -240,9 +243,12 @@ public String toString()

class DoubleExpr extends ConstantExpr<Double>
{
final ExprEval expr;

DoubleExpr(Double value)
{
super(ExpressionType.DOUBLE, Preconditions.checkNotNull(value, "value"));
expr = ExprEval.ofDouble(value);
}

@Override
Expand All @@ -254,7 +260,7 @@ public String toString()
@Override
public ExprEval eval(ObjectBinding bindings)
{
return ExprEval.ofDouble(value);
return expr;
}

@Override
Expand Down Expand Up @@ -323,9 +329,12 @@ public String toString()

class StringExpr extends ConstantExpr<String>
{
final ExprEval expr;

StringExpr(@Nullable String value)
{
super(ExpressionType.STRING, NullHandling.emptyToNullIfNeeded(value));
expr = ExprEval.of(value);
}

@Override
Expand All @@ -337,7 +346,7 @@ public String toString()
@Override
public ExprEval eval(ObjectBinding bindings)
{
return ExprEval.of(value);
return expr;
}

@Override
Expand Down Expand Up @@ -464,15 +473,18 @@ public String toString()

class ComplexExpr extends ConstantExpr<Object>
{
final ExprEval expr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious since we are evaluating this early, does the equals and hash method need to honor this ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its not included in equals / hashCode as its a computed value from the value constructor argument;

as a matter of fact... ConstantExpr also has an outputType field which is also ignored in the equals / hashCode stuff - as that is passed in as a constructor parameter - actually this could probably be refactored to remove that field; however I'm not sure what benefits that would give...

the other way around could be to build the ContantExpr on top of an ExprEval - and use ExprEval.value() to get the inner value - but that approach would not like how the current BigIntegerExpr works...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how often Expr are used in equals/hashcode, my gut says not often since the thing that makes them also usually has the original string which was parsed into Expr and is likely cheaper to just use the string


protected ComplexExpr(ExpressionType outputType, @Nullable Object value)
{
super(outputType, value);
expr = ExprEval.ofComplex(outputType, value);
}

@Override
public ExprEval eval(ObjectBinding bindings)
{
return ExprEval.ofComplex(outputType, value);
return expr;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ public static ExprEval ofBoolean(boolean value, ExprType type)
case DOUBLE:
return ExprEval.of(Evals.asDouble(value));
case LONG:
return ExprEval.of(Evals.asLong(value));
return ofLongBoolean(value);
case STRING:
return ExprEval.of(String.valueOf(value));
default:
Expand All @@ -376,7 +376,7 @@ public static ExprEval ofBoolean(boolean value, ExprType type)
*/
public static ExprEval ofLongBoolean(boolean value)
{
return ExprEval.of(Evals.asLong(value));
return value ? LongExprEval.TRUE : LongExprEval.FALSE;
}

public static ExprEval ofComplex(ExpressionType outputType, @Nullable Object value)
Expand Down Expand Up @@ -922,6 +922,8 @@ public Expr toExpr()

private static class LongExprEval extends NumericExprEval
{
private static final LongExprEval TRUE = new LongExprEval(Evals.asLong(true));
private static final LongExprEval FALSE = new LongExprEval(Evals.asLong(false));
private static final LongExprEval OF_NULL = new LongExprEval(null);

private LongExprEval(@Nullable Number value)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ public static ExpressionType autoDetect(ExprEval eval, ExprEval otherEval)
{
ExpressionType type = eval.type();
ExpressionType otherType = otherEval.type();
if (type == otherType && eval.value() != null && otherEval.value() != null) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why does this need more than type == otherType? Later on we do

    type = eval.value() != null ? type : otherType;
    otherType = otherEval.value() != null ? otherType : type;

so if the value is null then it just takes on the other type, so i think being null or not here doesn't matter, because if the value is null and the same type then it is fine

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure - I was too cautious...if it will be null ; the type will be String as per the comment above the method - so it will be only == if the other is also a String

In this mode, null values are {@link ExprType#STRING} typed, despite
potentially coming from an underlying numeric column, or when an underlying column was completely missing and so all values are null

regarding those lines:
if eval.value() == null ; after the 1st line type = otherType ; independently from the the 2nd line that will mean that effectively numeric(otherType, otherType) will be called...

if(eval.value() == null) {
  return otherType;
}
if(otherEval.value() == null) {
  return type;
}
return numeric(type, otherType);

which made me wonder... can ExpressionType.getType() == COMPLEX for this method? if it could this change could alter its behaviour - as earlier DOUBLE was returned in that case

to avoid changing that behaviour - for now I've changed it to:

    if (type == otherType && type.getType().isPrimitive()) {
      return type;
    }

should these pass / fail / or shouldn't exists at all?

    final ExprEval<?> complexEval = ExprEval.ofComplex(ExpressionType.UNKNOWN_COMPLEX, new Object());
    Assert.assertEquals(ExpressionType.DOUBLE, ExpressionTypeConversion.autoDetect(longEval, complexEval));
    Assert.assertEquals(ExpressionType.DOUBLE, ExpressionTypeConversion.autoDetect(doubleEval, complexEval));
    Assert.assertEquals(ExpressionType.DOUBLE, ExpressionTypeConversion.autoDetect(arrayEval, complexEval));
    Assert.assertEquals(ExpressionType.DOUBLE, ExpressionTypeConversion.autoDetect(complexEval, complexEval));

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good question, so this function predates expressions supporting complex types, and shouldn't be used by anything that handles arrays or complex types.

additionally, the comment about nulls always being strings isn't quite as true as it once was, null values coming from columns use the correctly typed null constants, so its mostly null constants and nulls created without additional type information (bestEffortOf) which are string types.

I guess autoDetect should be an invalid exception if it is trying to process complex types, but autoDetect is also only used inside of eval methods as far as I can tell, Expr.getOutputType all use other methods in this class like function and operator which do handle complex and array types and ensure that they are compatible, so I'm not sure it proves to be much of a problem in practice.

return type;
}
if (Types.is(type, ExprType.STRING) && Types.is(otherType, ExprType.STRING)) {
return ExpressionType.STRING;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ public void testEqualsContractForUnaryMinusExpr()
public void testEqualsContractForStringExpr()
{
EqualsVerifier.forClass(StringExpr.class)
.withIgnoredFields("outputType")
.withIgnoredFields("outputType", "expr")
.withPrefabValues(ExpressionType.class, ExpressionType.STRING, ExpressionType.DOUBLE)
.usingGetClass()
.verify();
Expand All @@ -149,7 +149,7 @@ public void testEqualsContractForStringExpr()
public void testEqualsContractForDoubleExpr()
{
EqualsVerifier.forClass(DoubleExpr.class)
.withIgnoredFields("outputType")
.withIgnoredFields("outputType", "expr")
.withPrefabValues(ExpressionType.class, ExpressionType.DOUBLE, ExpressionType.LONG)
.usingGetClass()
.verify();
Expand All @@ -159,7 +159,7 @@ public void testEqualsContractForDoubleExpr()
public void testEqualsContractForLongExpr()
{
EqualsVerifier.forClass(LongExpr.class)
.withIgnoredFields("outputType")
.withIgnoredFields("outputType", "expr")
.withPrefabValues(ExpressionType.class, ExpressionType.LONG, ExpressionType.STRING)
.usingGetClass()
.verify();
Expand Down Expand Up @@ -187,6 +187,7 @@ public void testEqualsContractForComplexExpr()
ExpressionTypeFactory.getInstance().ofComplex("bar")
)
.withNonnullFields("outputType")
.withIgnoredFields("expr")
.usingGetClass()
.verify();
}
Expand Down
Loading