Skip to content

Commit

Permalink
[fix](Nereids) adjust min/max stats for cast function if types are co…
Browse files Browse the repository at this point in the history
…mparable (apache#28166)

estimate column stats for "cast(col, XXXType)"

-----cast-est------
query4 41169 40335 40267 40267
query58 463 361 401 361
Total cold run time: 41632 ms
Total hot run time: 40628 ms

----master------
query4 40624 40180 40299 40180
query58 487 389 420 389
Total cold run time: 41111 ms
Total hot run time: 40569 ms
  • Loading branch information
englefly authored Jan 12, 2024
1 parent 5f86b7b commit 1bd1530
Show file tree
Hide file tree
Showing 21 changed files with 316 additions and 129 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
import java.nio.ByteOrder;
import java.util.Objects;

public class DecimalLiteral extends LiteralExpr {
public class DecimalLiteral extends NumericLiteralExpr {
private static final Logger LOG = LogManager.getLogger(DecimalLiteral.class);
private BigDecimal value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import java.nio.ByteOrder;
import java.text.NumberFormat;

public class FloatLiteral extends LiteralExpr {
public class FloatLiteral extends NumericLiteralExpr {
private double value;

public FloatLiteral() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

public class IntLiteral extends LiteralExpr {
public class IntLiteral extends NumericLiteralExpr {
private static final Logger LOG = LogManager.getLogger(IntLiteral.class);

public static final long TINY_INT_MIN = Byte.MIN_VALUE; // -2^7 ~ 2^7 - 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import java.util.Objects;

// large int for the num that native types can not
public class LargeIntLiteral extends LiteralExpr {
public class LargeIntLiteral extends NumericLiteralExpr {
private static final Logger LOG = LogManager.getLogger(LargeIntLiteral.class);

// -2^127
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/LiteralExpr.java
// and modified by Doris

package org.apache.doris.analysis;

public abstract class NumericLiteralExpr extends LiteralExpr {
public NumericLiteralExpr() {
super();
}

public NumericLiteralExpr(NumericLiteralExpr other) {
super(other);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.doris.nereids.stats;

import org.apache.doris.analysis.ArithmeticExpr.Operator;
import org.apache.doris.analysis.NumericLiteralExpr;
import org.apache.doris.analysis.StringLiteral;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Add;
Expand Down Expand Up @@ -169,35 +170,50 @@ public ColumnStatistic visitCast(Cast cast, Statistics context) {
}

private ColumnStatistic castMinMax(ColumnStatistic colStats, DataType targetType) {
if (colStats.minExpr instanceof StringLiteral || colStats.maxExpr instanceof StringLiteral) {
if (targetType.isDateLikeType()) {
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
if (colStats.minExpr != null) {
try {
String strMin = colStats.minExpr.getStringValue();
DateLiteral dateMinLiteral = new DateLiteral(strMin);
long min = dateMinLiteral.getValue();
builder.setMinValue(min);
builder.setMinExpr(dateMinLiteral.toLegacyLiteral());
} catch (AnalysisException e) {
// ignore exception. do not convert min
}
// cast str to date/datetime
if (colStats.minExpr instanceof StringLiteral
&& colStats.maxExpr instanceof StringLiteral
&& targetType.isDateLikeType()) {
boolean convertSuccess = true;
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
if (colStats.minExpr != null) {
try {
String strMin = colStats.minExpr.getStringValue();
DateLiteral dateMinLiteral = new DateLiteral(strMin);
long min = dateMinLiteral.getValue();
builder.setMinValue(min);
builder.setMinExpr(dateMinLiteral.toLegacyLiteral());
} catch (AnalysisException e) {
convertSuccess = false;
}
if (colStats.maxExpr != null) {
try {
String strMax = colStats.maxExpr.getStringValue();
DateLiteral dateMaxLiteral = new DateLiteral(strMax);
long max = dateMaxLiteral.getValue();
builder.setMaxValue(max);
builder.setMaxExpr(dateMaxLiteral.toLegacyLiteral());
} catch (AnalysisException e) {
// ignore exception. do not convert max
}
}
if (convertSuccess && colStats.maxExpr != null) {
try {
String strMax = colStats.maxExpr.getStringValue();
DateLiteral dateMaxLiteral = new DateLiteral(strMax);
long max = dateMaxLiteral.getValue();
builder.setMaxValue(max);
builder.setMaxExpr(dateMaxLiteral.toLegacyLiteral());
} catch (AnalysisException e) {
convertSuccess = false;
}
}
if (convertSuccess) {
return builder.build();
}
}
return colStats;
// cast numeric to numeric
if (colStats.minExpr instanceof NumericLiteralExpr && colStats.maxExpr instanceof NumericLiteralExpr) {
if (targetType.isNumericType()) {
return colStats;
}
}

// cast other date types, set min/max infinity
ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats);
builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY)
.setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY);
return builder.build();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
/**
* decimal type literal
*/
public class DecimalLiteral extends Literal {
public class DecimalLiteral extends FractionalLiteral {

private final BigDecimal value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
/**
* Literal for DecimalV3 Type
*/
public class DecimalV3Literal extends Literal {
public class DecimalV3Literal extends FractionalLiteral {

private final BigDecimal value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
/**
* Double literal
*/
public class DoubleLiteral extends Literal {
public class DoubleLiteral extends FractionalLiteral {

private final double value;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
/**
* float type literal
*/
public class FloatLiteral extends Literal {
public class FloatLiteral extends FractionalLiteral {

private final float value;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.literal;

import org.apache.doris.nereids.types.DataType;

/**
* float/double/decimal
*/
public abstract class FractionalLiteral extends NumericLiteral {
/**
* Constructor for FractionalLiteral.
*
* @param dataType logical data type in Nereids
*/
public FractionalLiteral(DataType dataType) {
super(dataType);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import org.apache.doris.nereids.types.DataType;

/** IntegralLiteral */
public abstract class IntegerLikeLiteral extends Literal {
public abstract class IntegerLikeLiteral extends NumericLiteral {
/**
* Constructor for Literal.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.literal;

import org.apache.doris.nereids.types.DataType;

/**
* numeric literal
*/
public abstract class NumericLiteral extends Literal {
/**
* Constructor for NumericLiteral.
*
* @param dataType logical data type in Nereids
*/
public NumericLiteral(DataType dataType) {
super(dataType);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,21 @@

package org.apache.doris.nereids.stats;

import org.apache.doris.analysis.DateLiteral;
import org.apache.doris.analysis.StringLiteral;
import org.apache.doris.nereids.trees.expressions.Add;
import org.apache.doris.nereids.trees.expressions.Cast;
import org.apache.doris.nereids.trees.expressions.Divide;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Multiply;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.Subtract;
import org.apache.doris.nereids.trees.expressions.functions.agg.Max;
import org.apache.doris.nereids.trees.expressions.functions.agg.Min;
import org.apache.doris.nereids.types.DateType;
import org.apache.doris.nereids.types.DoubleType;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.ColumnStatisticBuilder;
import org.apache.doris.statistics.Statistics;
Expand Down Expand Up @@ -250,4 +256,69 @@ public void test8() {
Assertions.assertTrue(Precision.equals(0.1, estimated.minValue, 0.001));
Assertions.assertEquals(2, estimated.maxValue);
}

// cast(str to double) = double
@Test
public void testCastStrToDouble() {
SlotReference a = new SlotReference("a", StringType.INSTANCE);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(100)
.setMinExpr(new StringLiteral("01"))
.setMinValue(13333333)
.setMaxExpr(new StringLiteral("A9"))
.setMaxValue(23333333);
slotToColumnStat.put(a, builder.build());
Statistics stats = new Statistics(1000, slotToColumnStat);
Cast cast = new Cast(a, DoubleType.INSTANCE);
ColumnStatistic est = ExpressionEstimation.estimate(cast, stats);
Assertions.assertTrue(Double.isInfinite(est.minValue));
Assertions.assertTrue(Double.isInfinite(est.maxValue));
Assertions.assertNull(est.minExpr);
Assertions.assertNull(est.maxExpr);
}

// cast(str to date) = date
// both min and max can be converted to date
@Test
public void testCastStrToDateSuccess() {
SlotReference a = new SlotReference("a", StringType.INSTANCE);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(100)
.setMinExpr(new StringLiteral("2020-01-01"))
.setMinValue(20200101000000.0)
.setMaxExpr(new StringLiteral("2021-01-01"))
.setMaxValue(20210101000000.0);
slotToColumnStat.put(a, builder.build());
Statistics stats = new Statistics(1000, slotToColumnStat);
Cast cast = new Cast(a, DateType.INSTANCE);
ColumnStatistic est = ExpressionEstimation.estimate(cast, stats);
Assertions.assertTrue(est.minExpr instanceof DateLiteral);
Assertions.assertTrue(est.maxExpr instanceof DateLiteral);
Assertions.assertEquals(est.minValue, 20200101000000.0);
Assertions.assertEquals(est.maxValue, 20210101000000.0);
}

// cast(str to date) = date
// min or max cannot be converted to date
@Test
public void testCastStrToDateFail() {
SlotReference a = new SlotReference("a", StringType.INSTANCE);
Map<Expression, ColumnStatistic> slotToColumnStat = new HashMap<>();
ColumnStatisticBuilder builder = new ColumnStatisticBuilder()
.setNdv(100)
.setMinExpr(new StringLiteral("2020-01-01"))
.setMinValue(20200101000000.0)
.setMaxExpr(new StringLiteral("2021abcdefg"))
.setMaxValue(20210101000000.0);
slotToColumnStat.put(a, builder.build());
Statistics stats = new Statistics(1000, slotToColumnStat);
Cast cast = new Cast(a, DateType.INSTANCE);
ColumnStatistic est = ExpressionEstimation.estimate(cast, stats);
Assertions.assertTrue(Double.isInfinite(est.minValue));
Assertions.assertTrue(Double.isInfinite(est.maxValue));
Assertions.assertNull(est.minExpr);
Assertions.assertNull(est.maxExpr);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,9 @@ public void testBetweenCastFilter() {
.setNdv(100)
.setAvgSizeByte(4)
.setNumNulls(0)
.setMaxExpr(new IntLiteral(100))
.setMaxValue(100)
.setMinExpr(new IntLiteral(0))
.setMinValue(0)
.setCount(100);
DoubleLiteral begin = new DoubleLiteral(40.0);
Expand All @@ -847,7 +849,7 @@ public void testBetweenCastFilter() {
stats.addColumnStats(a, builder.build());
FilterEstimation filterEstimation = new FilterEstimation();
Statistics result = filterEstimation.estimate(and, stats);
Assertions.assertEquals(result.getRowCount(), 10, 0.01);
Assertions.assertEquals(10, result.getRowCount(), 0.01);
ColumnStatistic colStats = result.findColumnStatistics(a);
Assertions.assertTrue(colStats != null);
Assertions.assertEquals(10, colStats.ndv, 0.1);
Expand Down
Loading

0 comments on commit 1bd1530

Please sign in to comment.