-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add broker API to run a query on both query engines and compare results #13746
Changes from 1 commit
3e96471
e286e95
cbf303a
f40583d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,9 +33,12 @@ | |
import io.swagger.annotations.Authorization; | ||
import io.swagger.annotations.SecurityDefinition; | ||
import io.swagger.annotations.SwaggerDefinition; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.Executor; | ||
import javax.inject.Inject; | ||
import javax.ws.rs.DELETE; | ||
|
@@ -64,13 +67,17 @@ | |
import org.apache.pinot.common.response.PinotBrokerTimeSeriesResponse; | ||
import org.apache.pinot.common.response.broker.BrokerResponseNative; | ||
import org.apache.pinot.common.response.broker.QueryProcessingException; | ||
import org.apache.pinot.common.utils.DataSchema; | ||
import org.apache.pinot.common.utils.request.RequestUtils; | ||
import org.apache.pinot.core.auth.Actions; | ||
import org.apache.pinot.core.auth.Authorize; | ||
import org.apache.pinot.core.auth.ManualAuthorization; | ||
import org.apache.pinot.core.auth.TargetType; | ||
import org.apache.pinot.core.query.executor.sql.SqlQueryExecutor; | ||
import org.apache.pinot.spi.trace.RequestContext; | ||
import org.apache.pinot.core.query.request.context.QueryContext; | ||
import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils; | ||
import org.apache.pinot.core.query.request.context.utils.QueryContextUtils; | ||
import org.apache.pinot.spi.trace.RequestScope; | ||
import org.apache.pinot.spi.trace.Tracing; | ||
import org.apache.pinot.spi.utils.CommonConstants.Broker.Request; | ||
|
@@ -280,6 +287,67 @@ public void processTimeSeriesInstantQuery(@Suspended AsyncResponse asyncResponse | |
asyncResponse.resume(Response.ok().entity("{}").build()); | ||
} | ||
|
||
@POST | ||
@Produces(MediaType.APPLICATION_JSON) | ||
@Path("query/compare") | ||
@ApiOperation(value = "Query Pinot using both the single stage query engine and the multi stage query engine and " | ||
+ "compare the results") | ||
@ApiResponses(value = { | ||
@ApiResponse(code = 200, message = "Query result comparison response"), | ||
@ApiResponse(code = 500, message = "Internal Server Error") | ||
}) | ||
@ManualAuthorization | ||
public void processSqlQueryWithBothEnginesAndCompareResults(String query, @Suspended AsyncResponse asyncResponse, | ||
@Context org.glassfish.grizzly.http.server.Request requestContext, | ||
@Context HttpHeaders httpHeaders) { | ||
try { | ||
JsonNode requestJson = JsonUtils.stringToJsonNode(query); | ||
// TODO: Support inputting different v1 and v2 SQL queries | ||
if (!requestJson.has(Request.SQL)) { | ||
throw new IllegalStateException("Payload is missing the query string field 'sql'"); | ||
} | ||
|
||
CompletableFuture<BrokerResponse> v1Response = CompletableFuture.supplyAsync( | ||
() -> { | ||
try { | ||
return executeSqlQuery((ObjectNode) requestJson, makeHttpIdentity(requestContext), | ||
true, httpHeaders, false); | ||
} catch (Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
}, | ||
_executor | ||
); | ||
|
||
CompletableFuture<BrokerResponse> v2Response = CompletableFuture.supplyAsync( | ||
() -> { | ||
try { | ||
return executeSqlQuery((ObjectNode) requestJson, makeHttpIdentity(requestContext), | ||
true, httpHeaders, true); | ||
} catch (Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
}, | ||
_executor | ||
); | ||
|
||
CompletableFuture.allOf(v1Response, v2Response).join(); | ||
|
||
asyncResponse.resume(getPinotQueryComparisonResponse(requestJson.get(Request.SQL).asText(), v1Response.get(), | ||
v2Response.get())); | ||
} catch (WebApplicationException wae) { | ||
asyncResponse.resume(wae); | ||
} catch (Exception e) { | ||
LOGGER.error("Caught exception while processing request", e); | ||
asyncResponse.resume( | ||
new WebApplicationException(e, | ||
Response | ||
.status(Response.Status.INTERNAL_SERVER_ERROR) | ||
.entity(e.getMessage()) | ||
.build())); | ||
} | ||
} | ||
|
||
@DELETE | ||
@Path("query/{queryId}") | ||
@Authorize(targetType = TargetType.CLUSTER, action = Actions.Cluster.CANCEL_QUERY) | ||
|
@@ -356,7 +424,7 @@ private BrokerResponse executeSqlQuery(ObjectNode sqlRequestJson, HttpRequesterI | |
PinotSqlType sqlType = sqlNodeAndOptions.getSqlType(); | ||
if (onlyDql && sqlType != PinotSqlType.DQL) { | ||
return new BrokerResponseNative(QueryException.getException(QueryException.SQL_PARSING_ERROR, | ||
new UnsupportedOperationException("Unsupported SQL type - " + sqlType + ", GET API only supports DQL."))); | ||
new UnsupportedOperationException("Unsupported SQL type - " + sqlType + ", this API only supports DQL."))); | ||
} | ||
switch (sqlType) { | ||
case DQL: | ||
|
@@ -428,4 +496,70 @@ static Response getPinotQueryResponse(BrokerResponse brokerResponse) | |
.entity((StreamingOutput) brokerResponse::toOutputStream).type(MediaType.APPLICATION_JSON) | ||
.build(); | ||
} | ||
|
||
static Response getPinotQueryComparisonResponse(String query, BrokerResponse v1Response, BrokerResponse v2Response) { | ||
ObjectNode response = JsonUtils.newObjectNode(); | ||
response.set("v1Response", JsonUtils.objectToJsonNode(v1Response)); | ||
response.set("v2Response", JsonUtils.objectToJsonNode(v2Response)); | ||
response.set("comparisonAnalysis", JsonUtils.objectToJsonNode(analyzeDifferences(query, v1Response, v2Response))); | ||
|
||
return Response.ok() | ||
.header(PINOT_QUERY_ERROR_CODE_HEADER, -1) | ||
.entity(response).type(MediaType.APPLICATION_JSON) | ||
.build(); | ||
} | ||
|
||
private static List<String> analyzeDifferences(String query, BrokerResponse v1Response, BrokerResponse v2Response) { | ||
List<String> differences = new ArrayList<>(); | ||
|
||
if (v1Response.getExceptionsSize() != 0 || v2Response.getExceptionsSize() != 0) { | ||
differences.add("Exception encountered while running the query on one or both query engines"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Add exception messages to help out the user ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The query response itself contains the entire v1 and v2 responses which will have the exceptions. It seems redundant to duplicate that here? |
||
return differences; | ||
} | ||
|
||
DataSchema.ColumnDataType[] v1ResponseTypes = v1Response.getResultTable().getDataSchema().getColumnDataTypes(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you want to recognise cases where column ordering is different? I dont know if there are cases where the engines may return the result in different column orders. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not aware of any such cases where the column ordering differs in the two engines. However, we do check for column data type mismatches. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIR any simple select query without group by can return different results, even using the same engine. This should be more frequent if there are several segments involved in the query. In fact we would like to verify order if the query is order by and do not do that in the other case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Column data type check will identify it. IME, if there is a query with 20 columns, there will be 20 messages in the |
||
DataSchema.ColumnDataType[] v2ResponseTypes = v2Response.getResultTable().getDataSchema().getColumnDataTypes(); | ||
|
||
if (v1ResponseTypes.length != v2ResponseTypes.length) { | ||
differences.add("Mismatch in number of columns returned. v1: " + v1ResponseTypes.length | ||
+ ", v2: " + v2ResponseTypes.length); | ||
return differences; | ||
} | ||
|
||
String[] v1ColumnNames = v1Response.getResultTable().getDataSchema().getColumnNames(); | ||
String[] v2ColumnNames = v2Response.getResultTable().getDataSchema().getColumnNames(); | ||
for (int i = 0; i < v1ResponseTypes.length; i++) { | ||
if (v1ResponseTypes[i] != v2ResponseTypes[i]) { | ||
String columnName = v1ColumnNames[i].equals(v2ColumnNames[i]) | ||
? v1ColumnNames[i] | ||
: v1ColumnNames[i] + " / " + v2ColumnNames[i]; | ||
differences.add("Mismatch in column data type for column with name " + columnName | ||
+ ". v1 type: " + v1ResponseTypes[i] + ", v2 type: " + v2ResponseTypes[i]); | ||
} | ||
} | ||
|
||
if (v1Response.getNumRowsResultSet() != v2Response.getNumRowsResultSet()) { | ||
differences.add("Mismatch in number of rows returned. v1: " + v1Response.getNumRowsResultSet() | ||
+ ", v2: " + v2Response.getNumRowsResultSet()); | ||
return differences; | ||
} | ||
|
||
QueryContext queryContext = QueryContextConverterUtils.getQueryContext(query); | ||
if (QueryContextUtils.isAggregationQuery(queryContext) && queryContext.getGroupByExpressions() == null) { | ||
// Aggregation-only query with a single row output | ||
for (int i = 0; i < v1ColumnNames.length; i++) { | ||
if (!Objects.equals(v1Response.getResultTable().getRows().get(0)[i], | ||
v2Response.getResultTable().getRows().get(0)[i])) { | ||
differences.add("Mismatch in aggregation value for " + v1ColumnNames[i] | ||
+ ". v1 value: " + v1Response.getResultTable().getRows().get(0)[i] | ||
+ ", v2 value: " + v2Response.getResultTable().getRows().get(0)[i]); | ||
} | ||
} | ||
} | ||
|
||
// TODO: Compare response row values if it makes sense for the query type. Handle edge cases with group trimming, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: document as a javadoc? |
||
// non-deterministic results for order by queries with limits etc. | ||
|
||
return differences; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am using swagger APIs quite a bit today. Setting
sql
vsv1Sql/v2Sql
maybe confusing. Majority of users will only use the doc in the swagger page to use the API. There are a couple of options to reduce confusion:v1Sql
/v2Sql
. Its not that hard to copy paste twice.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, I've gone with your option 2 since ideally we'd want most single-stage engine queries to work as is on the multi-stage query engine.