Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
*/
package org.apache.pinot.query.type;

import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
import org.apache.calcite.sql.type.SqlTypeName;


/**
Expand Down Expand Up @@ -50,4 +53,11 @@ public int getMaxNumericScale() {
public int getMaxNumericPrecision() {
return MAX_DECIMAL_PRECISION_DIGIT;
}

@Override
public RelDataType deriveAvgAggType(RelDataTypeFactory typeFactory,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great catch! You will need to make some changes to the WindowFunctionsPlans.json to pick up the casting in the plans (I've used AVG quite extensively in that file)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved all test issues.

RelDataType argumentType) {
return typeFactory.createTypeWithNullability(
typeFactory.createSqlType(SqlTypeName.DOUBLE), false);
Comment on lines +60 to +61
Copy link
Contributor

@walterddr walterddr Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the right way to fix the issue. we should rely on Calcite's derived type system to figure out what the return type should be. this way all AVG results are returning as double which is going to cause more issue than it resolves.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

based on a coarse read, we should check the input decimal precision and try to do upcast to the appropriate floating point type. (e.g. the scale and precision

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the review! I resorted to using double datatype for AVG because that's what we do in V1 engine.

We could maybe do what postgres does. Thoughts?

numeric for any integer type argument, double precision for a floating-point argument, otherwise the same as the argument data type

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on offline discussion, created oss issue #10318 to determine the logic for AVG return type.

}
}
16 changes: 16 additions & 0 deletions pinot-query-planner/src/test/resources/queries/AggregatePlans.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
{
"aggregates_planning_tests": {
"queries": [
{
"description": "Select aggregates with filters and select alias",
"sql": "EXPLAIN PLAN FOR SELECT AVG(a.col3) as avg, COUNT(*) as count FROM a WHERE a.col3 >= 0 AND a.col2 = 'pink floyd'",
"output": [
"Execution Plan",
"\nLogicalProject(avg=[/(CAST($0):DOUBLE, $1)], count=[$1])",
"\n LogicalProject($f0=[CASE(=($1, 0), null:INTEGER, $0)], $f1=[$1])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[$SUM0($1)])",
"\n LogicalExchange(distribution=[hash])",
"\n LogicalAggregate(group=[{}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
"\n LogicalProject(col2=[$0], col3=[$1])",
"\n LogicalFilter(condition=[AND(>=($1, 0), =($0, 'pink floyd'))])",
"\n LogicalTableScan(table=[[a]])",
"\n"
]
},
{
"description": "Select aggregates",
"sql": "EXPLAIN PLAN FOR SELECT SUM(a.col3), COUNT(a.col1) FROM a",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"Execution Plan",
"\nLogicalProject(col1=[$0], EXPR$1=[$1], EXPR$2=[$2])",
"\n LogicalFilter(condition=[AND(>($1, 10), >=($3, 0), <($4, 20), <=($2, 10), =($5, 5))])",
"\n LogicalProject(col1=[$0], EXPR$1=[$1], EXPR$2=[$2], $f3=[$3], $f4=[$4], $f5=[CAST(/($5, $1)):INTEGER NOT NULL])",
"\n LogicalProject(col1=[$0], EXPR$1=[$1], EXPR$2=[$2], $f3=[$3], $f4=[$4], $f5=[/(CAST($5):DOUBLE NOT NULL, $1)])",
"\n LogicalProject(col1=[$0], EXPR$1=[$1], EXPR$2=[$2], $f3=[$3], $f4=[$4], $f5=[$2])",
"\n LogicalAggregate(group=[{0}], EXPR$1=[$SUM0($1)], EXPR$2=[$SUM0($2)], agg#2=[MAX($3)], agg#3=[MIN($4)])",
"\n LogicalExchange(distribution=[hash[0]])",
Expand All @@ -81,7 +81,7 @@
"Execution Plan",
"\nLogicalProject(value1=[$0], count=[$1], SUM=[$2])",
"\n LogicalFilter(condition=[AND(>($1, 10), >=($3, 0), <($4, 20), <=($2, 10), =($5, 5))])",
"\n LogicalProject(col1=[$0], count=[$1], SUM=[$2], $f3=[$3], $f4=[$4], $f5=[CAST(/($5, $1)):INTEGER NOT NULL])",
"\n LogicalProject(col1=[$0], count=[$1], SUM=[$2], $f3=[$3], $f4=[$4], $f5=[/(CAST($5):DOUBLE NOT NULL, $1)])",
"\n LogicalProject(col1=[$0], count=[$1], SUM=[$2], $f3=[$3], $f4=[$4], $f5=[$2])",
"\n LogicalAggregate(group=[{0}], count=[$SUM0($1)], SUM=[$SUM0($2)], agg#2=[MAX($3)], agg#3=[MIN($4)])",
"\n LogicalExchange(distribution=[hash[0]])",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
"sql": "EXPLAIN PLAN FOR SELECT a.col1, AVG(b.col3) FROM a JOIN b ON a.col1 = b.col2 WHERE a.col3 >= 0 AND a.col2 = 'a' AND b.col3 < 0 GROUP BY a.col1",
"output": [
"Execution Plan",
"\nLogicalProject(col1=[$0], EXPR$1=[CAST(/($1, $2)):INTEGER NOT NULL])",
"\nLogicalProject(col1=[$0], EXPR$1=[/(CAST($1):DOUBLE NOT NULL, $2)])",
"\n LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[$SUM0($2)])",
"\n LogicalExchange(distribution=[hash[0]])",
"\n LogicalAggregate(group=[{0}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@
}
},
"queries": [
{
Copy link
Contributor Author

@vvivekiyer vvivekiyer Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Query on H2 database is returning an INT. Couldn't find a better workaround other than removing this test for H2. Would appreciate inputs here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we comment out this test for now instead of removing ?

Will SELECT AVG(CAST int_col AS DOUBLE) FROM FOO work ?

"psql": "4.2.7",
"description": "average int",
"sql": "SELECT avg(int_col) FROM {tbl}"
},
{
"psql": "4.2.7",
"description": "average double",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@
"description": "test agg function with STD SQL operator works properly",
"sql": "SELECT sUm(col3_r), AvG(col3_r), MAX(PLUS(Add(CAST(col3_l AS DOUBLE), CAST(col3_r AS DOUBLE)), CAST(10 AS DOUBLE))) FROM (SELECT {tbl1}.col1 AS col1, {tbl1}.col3 AS col3_l, {tbl2}.col3 AS col3_r FROM {tbl1} JOIN {tbl2} USING (col2))",
"outputs": [
[7, 3, 15.0]
[7, 3.5, 15.0]
]
},
{
"description": "test scalar function with STD SQL operator, and scalar function without STD SQL operator can be found properly",
"sql": "SELECT sUm(col3), AvG(col3), MAX(pluS(CAST(col3 AS DOUBLE), CAST(10 AS DOUBLE))) FROM {tbl1}",
"outputs": [
[3, 1, 12.0]
[3, 1.5, 12.0]
]
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,23 @@
"description": "multi 'with' table",
"sql": "WITH agg1 AS ( SELECT strCol, sum(intCol) AS sumVal FROM {tbl1} GROUP BY strCol), agg2 AS (SELECT strCol1, avg(intCol) AS avgVal FROM {tbl2} GROUP BY strCol1) SELECT strCol, sumVal - avgVal FROM agg1, agg2 WHERE agg1.strCol = agg2.strCol1",
"outputs": [
["a", 3],
["b", -98460]
["a", 2.5],
["b", -98460.5]
]
},
{
"description": "nested 'with' on agg table: (with a as ( ... ), select ... ",
"sql": "WITH agg1 AS (SELECT strCol1, strCol2, sum(intCol) AS sumVal FROM {tbl2} GROUP BY strCol1, strCol2) SELECT strCol1, avg(sumVal) AS avgVal FROM agg1 GROUP BY strCol1",
"outputs": [
["a", 1],
["b", 98462]
["a", 1.5],
["b", 98462.5]
]
},
{
"description": "nested multi 'with' table: (with a as ( ... ), with b as ( ... from a ... ) select ... ",
"sql": "WITH agg1 AS (SELECT strCol1, strCol2, sum(intCol) AS sumVal FROM {tbl2} GROUP BY strCol1, strCol2), agg2 AS ( SELECT strCol1, avg(sumVal) AS avgVal FROM agg1 GROUP BY strCol1) SELECT strCol1, avgVal FROM agg2 WHERE avgVal < 100",
"outputs": [
["a", 1]
["a", 1.5]
]
},
{
Expand Down