Skip to content
This repository was archived by the owner on Aug 2, 2022. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
d568e9a
Change grammar and AST builder
dai-chen Aug 21, 2020
e7ef9ea
Change analyzer
dai-chen Aug 22, 2020
4947279
Expression name issue
dai-chen Aug 24, 2020
1f0e218
Merge branch 'develop' into support-group-by-in-new-sql-parser
dai-chen Aug 24, 2020
df058a6
Add context in sql ast builder
dai-chen Aug 25, 2020
401e0c0
Add javadoc, deduplicate and dedicate aggregation builder
dai-chen Aug 25, 2020
5b25c8a
Add UT for aggregation builder
dai-chen Aug 25, 2020
579ddf3
Add validation in aggregation builder
dai-chen Aug 25, 2020
89d3363
Fix function arguments are not on child list issue
dai-chen Aug 25, 2020
c17344a
Add validation for non aggregated column if no group by
dai-chen Aug 25, 2020
d1f7b58
Fix jacoco
dai-chen Aug 25, 2020
e6d249a
Refactor and add UT
dai-chen Aug 25, 2020
b979535
Add comparison test
dai-chen Aug 26, 2020
eeed0a2
Fix broken UT
dai-chen Aug 26, 2020
66379cf
Fix UT
dai-chen Aug 26, 2020
e381f97
Add IT for validation
dai-chen Aug 26, 2020
03c423a
Fix broken IT
dai-chen Aug 26, 2020
9787b1d
Add more comparison tests
dai-chen Aug 26, 2020
78cfb4e
Add more comparison tests
dai-chen Aug 27, 2020
67d2edc
Add comparison test for bug fix 280
dai-chen Aug 27, 2020
244478d
Remove COUNT and change grammar for SUM/AVG for now
dai-chen Aug 27, 2020
816d687
Add comparison test for bug fix 288
dai-chen Aug 28, 2020
7661cf0
Support group by alias
dai-chen Aug 28, 2020
3dc0ed6
Support group by ordinal and add comparison test
dai-chen Aug 28, 2020
475b741
Add UT for edge case of ordinal
dai-chen Aug 28, 2020
645c254
Pass jacoco
dai-chen Aug 28, 2020
fbdab96
Skip problematic IT for now
dai-chen Aug 28, 2020
7caa9e7
Add more comparison test for index alias
dai-chen Aug 28, 2020
b01677f
Prepare PR
dai-chen Sep 2, 2020
86703a1
Add and reformat javadoc
dai-chen Sep 3, 2020
431b701
Refactor query spec by separating visitor
dai-chen Sep 3, 2020
517418c
Prepare PR
dai-chen Sep 3, 2020
fb056a8
Merge branch 'develop' into support-group-by-in-new-sql-parser
dai-chen Sep 3, 2020
0c0c5f8
Bug fix 373
dai-chen Sep 3, 2020
4970518
Bug fix 717
dai-chen Sep 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@
package com.amazon.opendistroforelasticsearch.sql.analysis;

import com.amazon.opendistroforelasticsearch.sql.analysis.symbol.Namespace;
import com.amazon.opendistroforelasticsearch.sql.analysis.symbol.Symbol;
import com.amazon.opendistroforelasticsearch.sql.ast.AbstractNodeVisitor;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.Alias;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.AllFields;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.Field;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.QualifiedName;
import com.amazon.opendistroforelasticsearch.sql.ast.expression.UnresolvedExpression;
import com.amazon.opendistroforelasticsearch.sql.data.type.ExprType;
import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException;
import com.amazon.opendistroforelasticsearch.sql.expression.DSL;
import com.amazon.opendistroforelasticsearch.sql.expression.Expression;
import com.amazon.opendistroforelasticsearch.sql.expression.NamedExpression;
import com.amazon.opendistroforelasticsearch.sql.expression.ReferenceExpression;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -64,12 +67,26 @@ public List<NamedExpression> visitField(Field node, AnalysisContext context) {

@Override
public List<NamedExpression> visitAlias(Alias node, AnalysisContext context) {
Expression expr = referenceIfSymbolDefined(node.getDelegated(), context);
return Collections.singletonList(DSL.named(
unqualifiedNameIfFieldOnly(node, context),
node.getDelegated().accept(expressionAnalyzer, context),
expr,
node.getAlias()));
}

private Expression referenceIfSymbolDefined(UnresolvedExpression expr,
AnalysisContext context) {
try {
// Since resolved aggregator.toString() is used as symbol name, unresolved expression
// needs to be analyzed too to get toString() name for consistency
String symbolName = expressionAnalyzer.analyze(expr, context).toString();
ExprType type = context.peek().resolve(new Symbol(Namespace.FIELD_NAME, symbolName));
return DSL.ref(symbolName, type);
} catch (SemanticCheckException e) {
return expr.accept(expressionAnalyzer, context);
}
}

@Override
public List<NamedExpression> visitAllFields(AllFields node,
AnalysisContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
package com.amazon.opendistroforelasticsearch.sql.ast.expression;

import com.amazon.opendistroforelasticsearch.sql.ast.AbstractNodeVisitor;
import com.google.common.collect.ImmutableList;
import java.util.Collections;
import java.util.List;
import lombok.EqualsAndHashCode;
import lombok.Getter;
Expand All @@ -37,7 +37,7 @@ public class Function extends UnresolvedExpression {

@Override
public List<UnresolvedExpression> getChild() {
return ImmutableList.of();
return Collections.unmodifiableList(funcArgs);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package com.amazon.opendistroforelasticsearch.sql.analysis;

import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.FLOAT;
import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.INTEGER;
import static com.amazon.opendistroforelasticsearch.sql.data.type.ExprCoreType.STRUCT;
import static org.junit.jupiter.api.Assertions.assertEquals;
Expand Down Expand Up @@ -57,6 +58,18 @@ public void named_expression_with_alias() {
);
}

@Test
public void named_expression_with_delegated_expression_defined_in_symbol_table() {
analysisContext.push();
analysisContext.peek().define(new Symbol(Namespace.FIELD_NAME, "avg(integer_value)"), FLOAT);

assertAnalyzeEqual(
DSL.named("AVG(integer_value)", DSL.ref("avg(integer_value)", FLOAT)),
AstDSL.alias("AVG(integer_value)",
AstDSL.aggregate("AVG", AstDSL.qualifiedName("integer_value")))
);
}

@Test
public void field_name_with_qualifier() {
analysisContext.peek().define(new Symbol(Namespace.INDEX_NAME, "index_alias"), STRUCT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,8 @@ public void distinctWithOneField() {
);
}

@Ignore("Skip this because it compares result of GROUP BY and DISTINCT and find difference in\n"
+ "schema type (string and text). Remove this when DISTINCT supported in new engine.")
@Test
public void distinctWithMultipleFields() {
Assert.assertEquals(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ public void groupBySingleField() throws IOException {
assertContainsData(getDataRows(response), fields);
}

@Ignore("The semantic of this and previous are wrong. The correct semantic is that * will "
+ "be expanded to all fields of the index. Error should be raise for both due to difference "
+ "between columns in SELECT and GROUP BY.")
@Test
public void groupByMultipleFields() throws IOException {
JSONObject response = executeQuery(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ public void selectAllWithFieldAndGroupByReverseOrder() throws IOException {
checkSelectAllAndFieldAggregationResponseSize(response, "age");
}

@Ignore("This failed because there is no alias field in schema of new engine default formatter")
@Test
public void selectFieldWithAliasAndGroupBy() {
String response =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.opendistroforelasticsearch.sql.sql;

import static com.amazon.opendistroforelasticsearch.sql.legacy.plugin.RestSqlAction.QUERY_API_ENDPOINT;
import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.featureValueOf;
import static org.elasticsearch.rest.RestStatus.BAD_REQUEST;
import static org.hamcrest.Matchers.is;

import com.amazon.opendistroforelasticsearch.sql.legacy.SQLIntegTestCase;
import java.io.IOException;
import java.util.Locale;
import java.util.function.Function;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.rest.RestStatus;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;

/**
* The query validation IT only covers test for error cases that not doable in comparison test.
* For all other tests, comparison test should be favored over manual written test like this.
*/
public class QueryValidationIT extends SQLIntegTestCase {

@Rule
public ExpectedException exceptionRule = ExpectedException.none();

@Override
protected void init() throws Exception {
loadIndex(Index.ACCOUNT);
}

@Ignore("Will add this validation in analyzer later")
@Test
public void testNonAggregatedSelectColumnMissingInGroupByClause() throws IOException {
expectResponseException()
.hasStatusCode(BAD_REQUEST)
.hasErrorType("SemanticCheckException")
.containsMessage("Expression [state] that contains non-aggregated column "
+ "is not present in group by clause")
.whenExecute("SELECT state FROM elasticsearch-sql_test_index_account GROUP BY age");
}

@Test
public void testNonAggregatedSelectColumnPresentWithoutGroupByClause() throws IOException {
expectResponseException()
.hasStatusCode(BAD_REQUEST)
.hasErrorType("SemanticCheckException")
.containsMessage("Explicit GROUP BY clause is required because expression [state] "
+ "contains non-aggregated column")
.whenExecute("SELECT state, AVG(age) FROM elasticsearch-sql_test_index_account");
}

public ResponseExceptionAssertion expectResponseException() {
return new ResponseExceptionAssertion(exceptionRule);
}

/**
* Response exception assertion helper to assert property value in ES ResponseException
* and Response inside. This serves as syntax sugar to improve the readability of test
* code.
*/
private static class ResponseExceptionAssertion {
private final ExpectedException exceptionRule;

private ResponseExceptionAssertion(ExpectedException exceptionRule) {
this.exceptionRule = exceptionRule;

exceptionRule.expect(ResponseException.class);
}

ResponseExceptionAssertion hasStatusCode(RestStatus code) {
exceptionRule.expect(featureValueOf("statusCode", is(code),
(Function<ResponseException, RestStatus>) e ->
RestStatus.fromCode(e.getResponse().getStatusLine().getStatusCode())));
return this;
}

ResponseExceptionAssertion hasErrorType(String type) {
exceptionRule.expectMessage("\"type\": \"" + type + "\"");
return this;
}

ResponseExceptionAssertion containsMessage(String... messages) {
for (String message : messages) {
exceptionRule.expectMessage(message);
}
return this;
}

void whenExecute(String query) throws IOException {
execute(query);
}
}

private static void execute(String query) throws IOException {
Request request = new Request("POST", QUERY_API_ENDPOINT);
request.setJsonEntity(String.format(Locale.ROOT, "{\n" + " \"query\": \"%s\"\n" + "}", query));

RequestOptions.Builder restOptionsBuilder = RequestOptions.DEFAULT.toBuilder();
restOptionsBuilder.addHeader("Content-Type", "application/json");
request.setOptions(restOptionsBuilder);

client().performRequest(request);
}

}
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/280.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT SUM(AvgTicketPrice - 10) FROM kibana_sample_data_flights
SELECT SUM(ABS(AvgTicketPrice * -2)) FROM kibana_sample_data_flights
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/288.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT avg(log(AvgTicketPrice)) FROM kibana_sample_data_flights GROUP BY DestCountry
SELECT avg(log(AvgTicketPrice)) FROM kibana_sample_data_flights GROUP BY DestCountry, dayOfWeek
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/373.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT Abs(dayOfWeek) FROM kibana_sample_data_flights GROUP BY abS(dayOfWeek)
SELECT abs(dayOfWeek) FROM kibana_sample_data_flights GROUP BY ABS(dayOfWeek)
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/430.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT Origin, SUM(AvgTicketPrice) FROM kibana_sample_data_flights GROUP BY Origin
SELECT Dest, AVG(FlightTimeMin) FROM kibana_sample_data_flights GROUP BY Dest
2 changes: 2 additions & 0 deletions integ-test/src/test/resources/correctness/bugfixes/717.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SELECT FlightDelay FROM kibana_sample_data_flights GROUP BY FlightDelay
SELECT FlightDelay, Cancelled FROM kibana_sample_data_flights GROUP BY FlightDelay, Cancelled
37 changes: 37 additions & 0 deletions integ-test/src/test/resources/correctness/queries/groupby.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
SELECT COUNT(*) FROM kibana_sample_data_flights
SELECT SUM(FlightDelayMin) FROM kibana_sample_data_flights
SELECT AVG(FlightDelayMin) FROM kibana_sample_data_flights
SELECT count(*), Avg(FlightDelayMin), sUm(FlightDelayMin) FROM kibana_sample_data_flights
SELECT COUNT(*) AS cnt, AVG(FlightDelayMin) AS a, SUM(FlightDelayMin) AS s FROM kibana_sample_data_flights
SELECT COUNT(*) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT COUNT(FlightNum) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT COUNT(FlightDelay) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT COUNT(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT OriginCountry, COUNT(*) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT OriginCountry, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT OriginCountry, SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT OriginCountry, COUNT(*), AVG(FlightDelayMin), SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry
SELECT COUNT(*), AVG(FlightDelayMin), SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry, OriginCityName
SELECT OriginCountry, COUNT(*), AVG(FlightDelayMin), SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry, OriginCityName
SELECT OriginCountry, OriginCityName, COUNT(*) FROM kibana_sample_data_flights GROUP BY OriginCountry, OriginCityName
SELECT OriginCountry, OriginCityName, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry, OriginCityName
SELECT OriginCountry, OriginCityName, SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry, OriginCityName
SELECT AvgTicketPrice, COUNT(*), AVG(AvgTicketPrice), SUM(AvgTicketPrice) FROM kibana_sample_data_flights GROUP BY AvgTicketPrice
SELECT FlightDelayMin, COUNT(*), AVG(FlightDelayMin), SUM(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY FlightDelayMin
SELECT OriginCountry, OriginCityName, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY 1, 2
SELECT OriginCountry, OriginCityName, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY OriginCountry, 2
SELECT OriginCountry, OriginCityName, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY 1, OriginCityName
SELECT ABS(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY 1
SELECT OriginCountry AS country, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY country
SELECT OriginCountry AS country, OriginCityName, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY country, 2
SELECT OriginCountry AS country, OriginCityName AS city, AVG(FlightDelayMin) FROM kibana_sample_data_flights GROUP BY country, city
SELECT ABS(FlightDelayMin) AS delay FROM kibana_sample_data_flights GROUP BY delay
SELECT flights.OriginCountry, AVG(FlightDelayMin) FROM kibana_sample_data_flights AS flights GROUP BY flights.OriginCountry
SELECT flights.OriginCountry, flights.OriginCityName, SUM(FlightDelayMin) FROM kibana_sample_data_flights AS flights GROUP BY flights.OriginCountry, flights.OriginCityName
SELECT flights.OriginCountry, flights.OriginCityName, SUM(FlightDelayMin) FROM kibana_sample_data_flights AS flights GROUP BY 1, 2
SELECT flights.OriginCountry AS country, flights.OriginCityName AS city, SUM(FlightDelayMin) FROM kibana_sample_data_flights AS flights GROUP BY country, city
SELECT ABS(flights.FlightDelayMin) AS delay FROM kibana_sample_data_flights AS flights GROUP BY delay
SELECT `flights`.OriginCountry, AVG(FlightDelayMin) FROM kibana_sample_data_flights AS `flights` GROUP BY `flights`.OriginCountry
SELECT `flights`.OriginCountry, AVG(FlightDelayMin) FROM kibana_sample_data_flights AS `flights` GROUP BY 1
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import static org.elasticsearch.rest.RestStatus.SERVICE_UNAVAILABLE;

import com.alibaba.druid.sql.parser.ParserException;
import com.amazon.opendistroforelasticsearch.sql.common.antlr.SyntaxCheckException;
import com.amazon.opendistroforelasticsearch.sql.exception.SemanticCheckException;
import com.amazon.opendistroforelasticsearch.sql.legacy.antlr.OpenDistroSqlAnalyzer;
import com.amazon.opendistroforelasticsearch.sql.legacy.antlr.SqlAnalysisConfig;
import com.amazon.opendistroforelasticsearch.sql.legacy.antlr.SqlAnalysisException;
Expand Down Expand Up @@ -247,7 +249,9 @@ private static boolean isClientError(Exception e) {
|| e instanceof IllegalArgumentException
|| e instanceof IndexNotFoundException
|| e instanceof VerificationException
|| e instanceof SqlAnalysisException;
|| e instanceof SqlAnalysisException
|| e instanceof SyntaxCheckException
|| e instanceof SemanticCheckException;
}

private void sendResponse(final RestChannel channel, final String message, final RestStatus status) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ public void skipExplainThatNotSupport() {
@Test
public void skipQueryThatNotSupport() {
SQLQueryRequest request = new SQLQueryRequest(
new JSONObject("{\"query\": \"SELECT * FROM test WHERE age = 10 GROUP BY age\"}"),
"SELECT * FROM test WHERE age = 10 GROUP BY age",
new JSONObject("{\"query\": \"SELECT * FROM test WHERE age = 10 GROUP BY age LIMIT 10\"}"),
"SELECT * FROM test WHERE age = 10 GROUP BY age LIMIT 10",
QUERY_API_ENDPOINT,
"");

Expand Down
21 changes: 20 additions & 1 deletion sql/src/main/antlr/OpenDistroSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,25 @@ selectElement
fromClause
: FROM tableName (AS? alias)?
(whereClause)?
(groupByClause)?
;

whereClause
: WHERE expression
;

groupByClause
: GROUP BY groupByElements
;

groupByElements
: groupByElement (COMMA groupByElement)*
;

groupByElement
: expression
;

// Literals

constant
Expand Down Expand Up @@ -193,13 +206,19 @@ nullNotnull

functionCall
: scalarFunctionName LR_BRACKET functionArgs? RR_BRACKET #scalarFunctionCall
| aggregateFunction #aggregateFunctionCall
;

scalarFunctionName
: mathematicalFunctionName
| dateTimeFunctionName
;

aggregateFunction
: functionName=(AVG | SUM) LR_BRACKET functionArg RR_BRACKET
/*| COUNT LR_BRACKET (STAR | functionArg) RR_BRACKET */
;

mathematicalFunctionName
: ABS | CEIL | CEILING | CONV | CRC32 | E | EXP | FLOOR | LN | LOG | LOG10 | LOG2 | MOD | PI | POW | POWER
| RAND | ROUND | SIGN | SQRT | TRUNCATE
Expand All @@ -215,7 +234,7 @@ dateTimeFunctionName
;

functionArgs
: (functionArg (COMMA functionArg)*)?
: functionArg (COMMA functionArg)*
;

functionArg
Expand Down
Loading