-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[7867] Handle Select * with Extra Columns #7959
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
12b644f
c93e7c2
bd9b0b6
b14c500
560560a
83766d7
e223b56
21de33f
6761eaa
fc51eee
0750616
341cc5d
0a1f0f6
b346f4d
981ca30
4159c57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,276 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.apache.pinot.broker.requesthandler; | ||
|
|
||
| import com.google.common.collect.ImmutableMap; | ||
| import java.util.ArrayList; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.stream.Collectors; | ||
| import org.apache.pinot.common.request.Expression; | ||
| import org.apache.pinot.common.request.PinotQuery; | ||
| import org.apache.pinot.sql.parsers.CalciteSqlParser; | ||
| import org.testng.Assert; | ||
| import org.testng.annotations.Test; | ||
|
|
||
|
|
||
| public class SelectStarWithOtherColsRewriteTest { | ||
|
|
||
| private static final Map<String, String> COL_MAP; | ||
|
|
||
| static { | ||
| //build table schema | ||
| ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); | ||
| builder.put("playerID", "playerID"); | ||
| builder.put("homeRuns", "homeRuns"); | ||
| builder.put("playerStint", "playerStint"); | ||
| builder.put("groundedIntoDoublePlays", "groundedIntoDoublePlays"); | ||
| builder.put("G_old", "G_old"); | ||
| builder.put("$segmentName", "$segmentName"); | ||
| builder.put("$docId", "$docId"); | ||
| builder.put("$hostName", "$hostName"); | ||
| COL_MAP = builder.build(); | ||
| } | ||
|
|
||
| /** | ||
| * When the query contains only '*', it should be expanded into columns. | ||
| */ | ||
| @Test | ||
| public void testShouldExpandWhenOnlyStarIsSelected() { | ||
| String sql = "SELECT * FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| Map<String, Integer> countMap = new HashMap<>(); | ||
| for (Expression selection : newSelections) { | ||
| String col = selection.getIdentifier().getName(); | ||
| countMap.put(col, countMap.getOrDefault(col, 0) + 1); | ||
| } | ||
| Assert.assertEquals(countMap.size(), 5, "More new selections than expected"); | ||
| Assert.assertTrue(countMap.keySet().containsAll(COL_MAP.keySet().stream().filter(a -> !a.startsWith("$")).collect( | ||
| Collectors.toList())), "New selections contain virtual columns"); | ||
| countMap.forEach( | ||
| (key, value) -> Assert.assertEquals((int) value, 1, key + " has more than one occurrences in new selection")); | ||
| } | ||
|
|
||
| /** | ||
| * Expansion should not contain any virtual columns | ||
| */ | ||
| @Test | ||
| public void testShouldNotReturnExtraDefaultColumns() { | ||
| String sql = "SELECT $docId,*,$segmentName FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| int docIdCnt = 0; | ||
| int segmentNameCnt = 0; | ||
| for (Expression newSelection : newSelections) { | ||
| String colName = newSelection.getIdentifier().getName(); | ||
| switch (colName) { | ||
| case "$docId": | ||
| docIdCnt++; | ||
| break; | ||
| case "$segmentName": | ||
| segmentNameCnt++; | ||
| break; | ||
| case "$hostName": | ||
| throw new RuntimeException("Extra default column returned"); | ||
| default: | ||
| } | ||
| } | ||
| Assert.assertEquals(docIdCnt, 1); | ||
| Assert.assertEquals(segmentNameCnt, 1); | ||
| } | ||
|
|
||
| /** | ||
| * Columns should not be deduped | ||
| */ | ||
| @Test | ||
| public void testShouldNotDedupMultipleRequestedColumns() { | ||
| String sql = "SELECT playerID,*,G_old FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| int playerIdCnt = 0; | ||
| int goldCount = 0; | ||
| for (Expression newSelection : newSelections) { | ||
| String colName = newSelection.getIdentifier().getName(); | ||
| switch (colName) { | ||
| case "playerID": | ||
| playerIdCnt++; | ||
| break; | ||
| case "G_old": | ||
| goldCount++; | ||
| break; | ||
| default: | ||
| } | ||
| } | ||
| Assert.assertEquals(playerIdCnt, 2, "playerID does not occur once"); | ||
| Assert.assertEquals(goldCount, 2, "G_old occurs does not occur once"); | ||
| } | ||
|
|
||
| /** | ||
| * Selections should be returned in the requested order | ||
| */ | ||
| @Test | ||
| public void testSelectionOrder() { | ||
| String sql = "SELECT playerID,*,G_old FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not correct / compliant with Standard SQL as far as I can tell. The column list in the select expressions should be deduplicated. So, Even generally for non star queries, using the same column name twice in the SELECT clause should be deduplicated. However, the scenario of aliasing must be handled. For example, the query
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @siddharthteotia thanks for the review. Actually I wasn't aware that this is dictated by the SQL standard. Had asked both of these questions (deduplication and returning internal columns) in the original issue for clarification. Will address these.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is what I am seeing on mysql (no dedup when * is combined with column names in select list), but not sure how standards complaint mysql is. In either case we should check against at least two other databases before setting behavior here since it will be difficult to change once set.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tried this with Postgres, same behaviour (column is returned twice). The ANSI SQL standard doesn't allow for extra columns to be added but DBs have their own extensions to it. I think we should go with how MySQL and Postgres are doing it, that is, returning the columns twice unless there are some implications.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the above query My preference would be to deduplicate and not return the column twice as the output is cleaner that way. So, in the above query cases column should be returned exactly once. If the user wants to get the same column twice, they should use aliasing (like I mentioned earlier)
CustomerID should be returned twice -- once as C1 and next as CustomerID But if we want to stick to how MySQL or Postgres is doing and that is the general ANSI SQL / user expectation, then let's not deduplicate. I am ok with it. Just does not look clean Trying the query here seems to deduplicate -- https://www.w3schools.com/sql/trysql.asp?filename=trysql_select_all Not sure which database it is using underneath
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| Assert.assertEquals(newSelections.get(0).getIdentifier().getName(), "playerID"); | ||
| Assert.assertEquals(newSelections.get(newSelections.size() - 1).getIdentifier().getName(), "G_old"); | ||
| //the expanded list should be alphabetically sorted | ||
| List<Expression> expandedSelections = newSelections.subList(1, newSelections.size() - 1); | ||
| List<Expression> assumedUnsortedSelections = new ArrayList<>(expandedSelections); | ||
| //sort alphabetically | ||
| assumedUnsortedSelections.sort(null); | ||
| Assert.assertEquals(expandedSelections, assumedUnsortedSelections, "Expanded selections not sorted alphabetically"); | ||
| } | ||
|
|
||
| /** | ||
| * When the same column is requested twice, once with an alias and once as part of *, then it should be returned twice | ||
| */ | ||
| @Test | ||
| public void testAliasing() { | ||
| String sql = "SELECT playerID as pid,* FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "AS"); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), | ||
| "playerID"); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "pid"); | ||
| boolean playerIdPresent = false; | ||
| for (int i = 1; i < newSelections.size(); i++) { | ||
| if (newSelections.get(i).getIdentifier().getName().equals("playerID")) { | ||
| playerIdPresent = true; | ||
| break; | ||
| } | ||
| } | ||
| Assert.assertTrue(playerIdPresent, "playerID col is missing"); | ||
| } | ||
|
|
||
| /** | ||
| * When a function is applied to a column, then that col is returned along with the original column | ||
| */ | ||
| @Test | ||
| public void testFuncOnColumns1() { | ||
| String sql = "SELECT sqrt(homeRuns),* FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "SQRT"); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), | ||
| "homeRuns"); | ||
| //homeRuns is returned as well | ||
| int homeRunsCnt = 0; | ||
| for (Expression selection : newSelections) { | ||
| if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("homeRuns")) { | ||
| homeRunsCnt++; | ||
| } | ||
| } | ||
| Assert.assertEquals(homeRunsCnt, 1); | ||
| } | ||
|
|
||
| /** | ||
| * When a function is applied to a column, then that col is returned along with the original column | ||
| */ | ||
| @Test | ||
| public void testFuncOnColumns2() { | ||
| String sql = "SELECT add(homeRuns,groundedIntoDoublePlays),* FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "ADD"); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), | ||
| "homeRuns"); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), | ||
| "groundedIntoDoublePlays"); | ||
| //homeRuns is returned as well | ||
| int homeRunsCnt = 0; | ||
| int groundedIntoDoublePlaysCnt = 0; | ||
| for (Expression selection : newSelections) { | ||
| if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("homeRuns")) { | ||
| homeRunsCnt++; | ||
| } else if (selection.isSetIdentifier() && selection.getIdentifier().getName().equals("groundedIntoDoublePlays")) { | ||
| groundedIntoDoublePlaysCnt++; | ||
| } | ||
| } | ||
| Assert.assertEquals(homeRunsCnt, 1); | ||
| Assert.assertEquals(groundedIntoDoublePlaysCnt, 1); | ||
| } | ||
|
|
||
| /** | ||
| * When 'n' no. of unqualified * are present, then each column is returned 'n' times. | ||
| */ | ||
| @Test | ||
| public void testMultipleUnqualifiedStars() { | ||
| String sql = "SELECT *,* FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| Assert.assertEquals(newSelections.get(0).getIdentifier().getName(), "G_old"); | ||
| Assert.assertEquals(newSelections.get(1).getIdentifier().getName(), "groundedIntoDoublePlays"); | ||
| Assert.assertEquals(newSelections.get(2).getIdentifier().getName(), "homeRuns"); | ||
| Assert.assertEquals(newSelections.get(3).getIdentifier().getName(), "playerID"); | ||
| Assert.assertEquals(newSelections.get(4).getIdentifier().getName(), "playerStint"); | ||
| Assert.assertEquals(newSelections.get(5).getIdentifier().getName(), "G_old"); | ||
| Assert.assertEquals(newSelections.get(6).getIdentifier().getName(), "groundedIntoDoublePlays"); | ||
| Assert.assertEquals(newSelections.get(7).getIdentifier().getName(), "homeRuns"); | ||
| Assert.assertEquals(newSelections.get(8).getIdentifier().getName(), "playerID"); | ||
| Assert.assertEquals(newSelections.get(9).getIdentifier().getName(), "playerStint"); | ||
| } | ||
|
|
||
| @Test | ||
| public void testAll() { | ||
| String sql = | ||
| "SELECT abs(homeRuns),sqrt(groundedIntoDoublePlays),*,$segmentName,$hostName,playerStint as pstint,playerID " | ||
| + "FROM baseballStats"; | ||
| PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); | ||
| BaseBrokerRequestHandler.updateColumnNames("baseballStats", pinotQuery, false, COL_MAP); | ||
| List<Expression> newSelections = pinotQuery.getSelectList(); | ||
| Assert.assertTrue(newSelections.get(0).isSetFunctionCall()); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperator(), "ABS"); | ||
| Assert.assertEquals(newSelections.get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), | ||
| "homeRuns"); | ||
| Assert.assertTrue(newSelections.get(1).isSetFunctionCall()); | ||
| Assert.assertEquals(newSelections.get(1).getFunctionCall().getOperator(), "SQRT"); | ||
| Assert.assertEquals(newSelections.get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), | ||
| "groundedIntoDoublePlays"); | ||
| Assert.assertEquals(newSelections.get(2).getIdentifier().getName(), "G_old"); | ||
| Assert.assertEquals(newSelections.get(3).getIdentifier().getName(), "groundedIntoDoublePlays"); | ||
| Assert.assertEquals(newSelections.get(4).getIdentifier().getName(), "homeRuns"); | ||
| Assert.assertEquals(newSelections.get(5).getIdentifier().getName(), "playerID"); | ||
| Assert.assertEquals(newSelections.get(6).getIdentifier().getName(), "playerStint"); | ||
| Assert.assertEquals(newSelections.get(7).getIdentifier().getName(), "$segmentName"); | ||
| Assert.assertEquals(newSelections.get(8).getIdentifier().getName(), "$hostName"); | ||
| Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperator(), "AS"); | ||
| Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperands().get(0).getIdentifier().getName(), | ||
| "playerStint"); | ||
| Assert.assertEquals(newSelections.get(9).getFunctionCall().getOperands().get(1).getIdentifier().getName(), | ||
| "pstint"); | ||
| Assert.assertEquals(newSelections.get(10).getIdentifier().getName(), "playerID"); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also add test cases for:
SELECT sqrt(homeRuns), * FROM baseballStats,There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tried this with MySQL, it throws an error. With Postgres, it works. IMO if the context is clear enough (in this case we know the table name on which * is being selected), then we should return all the columns without the qualifier like how Postgres is doing it? Calcite doesn't throw a syntax error in this case btw.