Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@

import com.fasterxml.jackson.databind.JsonNode;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.pinot.broker.broker.AccessControlFactory;
import org.apache.pinot.broker.broker.AllowAllAccessControlFactory;
import org.apache.pinot.common.metrics.BrokerMetrics;
import org.apache.pinot.common.response.broker.BrokerResponseNative;
import org.apache.pinot.common.response.broker.ResultTable;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.metrics.PinotMetricUtils;
Expand Down Expand Up @@ -104,6 +106,22 @@ public void testLiteralOnlyTransformBrokerRequestFromSQL() {
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
.compileToPinotQuery("SELECT count(*) from foo "
+ "where bar = decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')")));
Assert.assertTrue(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT toUtf8('hello!')," + " fromUtf8(toUtf8('hello!')) FROM myTable")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT reverse(fromUtf8(foo))," + " toUtf8('hello!') FROM myTable")));
Assert.assertTrue(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT toBase64(toUtf8('hello!'))," + " fromBase64('aGVsbG8h') FROM myTable")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT reverse(toBase64(foo))," + " toBase64(fromBase64('aGVsbG8h')) FROM myTable")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(
CalciteSqlParser.compileToPinotQuery("SELECT fromBase64(toBase64(to_utf8(foo))) FROM myTable")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(
CalciteSqlParser.compileToPinotQuery("SELECT count(*) from foo " + "where bar = toBase64(toASCII('hello!'))")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(
CalciteSqlParser.compileToPinotQuery("SELECT count(*) from foo " + "where bar = fromBase64('aGVsbG8h')")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT count(*) from foo " + "where bar = fromUtf8(fromBase64('aGVsbG8h'))")));
}

@Test
Expand All @@ -115,6 +133,10 @@ public void testLiteralOnlyWithAsBrokerRequestFromSQL() {
Assert.assertTrue(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT encodeUrl('key1=value 1&key2=value@!$2&key3=value%3') AS encoded, "
+ "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') AS decoded")));
Assert.assertTrue(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT toUtf8('hello!') AS encoded, " + "fromUtf8(toUtf8('hello!')) AS decoded")));
Assert.assertTrue(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT toBase64(toUtf8('hello!')) AS encoded, " + "fromBase64('aGVsbG8h') AS decoded")));
}

@Test
Expand Down Expand Up @@ -211,6 +233,76 @@ public void testBrokerRequestHandlerWithAsFunction()
Assert.assertEquals(brokerResponse.getResultTable().getRows().get(0)[1].toString(),
"key1=value 1&key2=value@!$2&key3=value%3");
Assert.assertEquals(brokerResponse.getTotalDocs(), 0);

request = JsonUtils.stringToJsonNode(
"{\"sql\":\"SELECT toBase64(toUtf8('hello!')) AS encoded, " + "fromUtf8(fromBase64('aGVsbG8h')) AS decoded\"}");
requestStats = Tracing.getTracer().createRequestScope();
brokerResponse = requestHandler.handleRequest(request, null, requestStats);
ResultTable resultTable = brokerResponse.getResultTable();
DataSchema dataSchema = resultTable.getDataSchema();
List<Object[]> rows = resultTable.getRows();
Assert.assertEquals(dataSchema.getColumnName(0), "encoded");
Assert.assertEquals(dataSchema.getColumnDataType(0), DataSchema.ColumnDataType.STRING);
Assert.assertEquals(dataSchema.getColumnName(1), "decoded");
Assert.assertEquals(dataSchema.getColumnDataType(1), DataSchema.ColumnDataType.STRING);
Assert.assertEquals(rows.size(), 1);
Assert.assertEquals(rows.get(0).length, 2);
Assert.assertEquals(rows.get(0)[0].toString(), "aGVsbG8h");
Assert.assertEquals(rows.get(0)[1].toString(), "hello!");
Assert.assertEquals(brokerResponse.getTotalDocs(), 0);

request = JsonUtils.stringToJsonNode(
"{\"sql\":\"SELECT fromUtf8(fromBase64(toBase64(toUtf8('nested')))) AS output\"}");
requestStats = Tracing.getTracer().createRequestScope();
brokerResponse = requestHandler.handleRequest(request, null, requestStats);
resultTable = brokerResponse.getResultTable();
dataSchema = resultTable.getDataSchema();
rows = resultTable.getRows();
Assert.assertEquals(dataSchema.getColumnName(0), "output");
Assert.assertEquals(dataSchema.getColumnDataType(0), DataSchema.ColumnDataType.STRING);
Assert.assertEquals(rows.size(), 1);
Assert.assertEquals(rows.get(0).length, 1);
Assert.assertEquals(rows.get(0)[0].toString(), "nested");
Assert.assertEquals(brokerResponse.getTotalDocs(), 0);

request = JsonUtils.stringToJsonNode(
"{\"sql\":\"SELECT toBase64(toUtf8('this is a long string that will encode to more than 76 characters using "
+ "base64'))"
+ " AS encoded\"}");
requestStats = Tracing.getTracer().createRequestScope();
brokerResponse = requestHandler.handleRequest(request, null, requestStats);
resultTable = brokerResponse.getResultTable();
dataSchema = resultTable.getDataSchema();
rows = resultTable.getRows();
Assert.assertEquals(dataSchema.getColumnName(0), "encoded");
Assert.assertEquals(dataSchema.getColumnDataType(0), DataSchema.ColumnDataType.STRING);
Assert.assertEquals(rows.size(), 1);
Assert.assertEquals(rows.get(0).length, 1);
Assert.assertEquals(rows.get(0)[0].toString(),
"dGhpcyBpcyBhIGxvbmcgc3RyaW5nIHRoYXQgd2lsbCBlbmNvZGUgdG8gbW9yZSB0aGFuIDc2IGNoYXJhY3RlcnMgdXNpbmcgYmFzZTY0");
Assert.assertEquals(brokerResponse.getTotalDocs(), 0);

request = JsonUtils.stringToJsonNode("{\"sql\":\"SELECT fromUtf8(fromBase64"
+ "('dGhpcyBpcyBhIGxvbmcgc3RyaW5nIHRoYXQgd2lsbCBlbmNvZGUgdG8gbW9yZSB0aGFuIDc2IGNoYXJhY3RlcnMgdXNpbmcgYmFzZTY0"
+ "')) AS decoded\"}");
requestStats = Tracing.getTracer().createRequestScope();
brokerResponse = requestHandler.handleRequest(request, null, requestStats);
resultTable = brokerResponse.getResultTable();
dataSchema = resultTable.getDataSchema();
rows = resultTable.getRows();
Assert.assertEquals(dataSchema.getColumnName(0), "decoded");
Assert.assertEquals(dataSchema.getColumnDataType(0), DataSchema.ColumnDataType.STRING);
Assert.assertEquals(rows.size(), 1);
Assert.assertEquals(rows.get(0).length, 1);
Assert.assertEquals(rows.get(0)[0].toString(),
"this is a long string that will encode to more than 76 characters using base64");
Assert.assertEquals(brokerResponse.getTotalDocs(), 0);

request = JsonUtils.stringToJsonNode("{\"sql\":\"SELECT fromBase64" + "(0) AS decoded\"}");
requestStats = Tracing.getTracer().createRequestScope();
brokerResponse = requestHandler.handleRequest(request, null, requestStats);
Assert.assertTrue(
brokerResponse.getProcessingExceptions().get(0).getMessage().contains("IllegalArgumentException"));
}

/** Tests for EXPLAIN PLAN for literal only queries. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.Base64;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -393,6 +394,15 @@ public static byte[] toUtf8(String input) {
return input.getBytes(StandardCharsets.UTF_8);
}

/**
* @param input bytes
* @return UTF8 encoded string
*/
@ScalarFunction
public static String fromUtf8(byte[] input) {
return new String(input, StandardCharsets.UTF_8);
}

/**
* @see StandardCharsets#US_ASCII#encode(String)
* @param input
Expand Down Expand Up @@ -560,4 +570,22 @@ public static String decodeUrl(String input)
throws UnsupportedEncodingException {
return URLDecoder.decode(input, StandardCharsets.UTF_8.toString());
}

/**
* @param input binary data
* @return Base64 encoded String
*/
@ScalarFunction
public static String toBase64(byte[] input) {
return Base64.getEncoder().encodeToString(input);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the expectation on charset of input bytes / binary data ? Looks like we assume it is UTF8. What if the input bytes provided here was encoded using UTF16

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved - we will not have a fixed charset of input bytes now with the new implementation. For example, if user wants to encode the String "hello!", he/she can choose to do toBase64(toUtf8("hello")) or toBase64(toUtf16("hello")) (Pinot currently only supports toUtf8 and toASCII functions). During decoding, he/she can do fromUtf8(fromBase64("fvduivheui")) to fromUtf16(fromBase64("fvduivheui")) to get "hello!" depending on if they used UTF8 or UTF16 when they encoded.

Copy link
Contributor

@jasperjiaguo jasperjiaguo Aug 2, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we error if the user passes a string to the toBase64 function, or some implicit type casting will happen? MySQL users might not expect the syntax of toBase64(toUtf8("hello")). Similarly, if the user calls on fromBase64('aGVsbG8h'), they might not expecting us to return hex string. We should at least call this out in the documentation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolved - added tests for calling toBase64 on a string. Will add in documentation that BYTES col will be represented in HEX string(also link:https://docs.pinot.apache.org/users/user-guide-query/querying-pinot#bytes-column).

}

/**
* @param input Base64 encoded String
* @return decoded binary data
*/
@ScalarFunction
public static byte[] fromBase64(String input) {
return Base64.getDecoder().decode(input);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1890,6 +1890,68 @@ public void testCompilationInvokedFunction() {
decoded = and.getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getStringValue();
Assert.assertEquals(encoded, "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253");
Assert.assertEquals(decoded, "key1=value 1&key2=value@!$2&key3=value%3");

query = "select toBase64(toUtf8('hello!')), fromUtf8(fromBase64('aGVsbG8h')) from mytable";
pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
String encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue();
String decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue();
Assert.assertEquals(encodedBase64, "aGVsbG8h");
Assert.assertEquals(decodedBase64, "hello!");

query = "select toBase64(fromBase64('aGVsbG8h')), fromUtf8(fromBase64(toBase64(toUtf8('hello!')))) from mytable";
pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue();
decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue();
Assert.assertEquals(encodedBase64, "aGVsbG8h");
Assert.assertEquals(decodedBase64, "hello!");

query =
"select toBase64(toUtf8(upper('hello!'))), fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!'))))) from "
+ "mytable";
pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue();
decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue();
Assert.assertEquals(encodedBase64, "SEVMTE8h");
Assert.assertEquals(decodedBase64, "HELLO!");

query =
"select reverse(fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!')))))) from mytable where fromUtf8"
+ "(fromBase64(toBase64(toUtf8(upper('hello!')))))"
+ " = bar";
pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
String arg1 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue();
String leftOp =
pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue();
Assert.assertEquals(arg1, "!OLLEH");
Assert.assertEquals(leftOp, "HELLO!");

query = "select a from mytable where foo = toBase64(toUtf8('hello!')) and bar = fromUtf8(fromBase64('aGVsbG8h'))";
pinotQuery = CalciteSqlParser.compileToPinotQuery(query);
and = pinotQuery.getFilterExpression().getFunctionCall();
encoded = and.getOperands().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue();
decoded = and.getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getStringValue();
Assert.assertEquals(encoded, "aGVsbG8h");
Assert.assertEquals(decoded, "hello!");

query = "select fromBase64('hello') from mytable";
Exception expectedError = null;
try {
CalciteSqlParser.compileToPinotQuery(query);
} catch (Exception e) {
expectedError = e;
}
Assert.assertNotNull(expectedError);
Assert.assertTrue(expectedError instanceof SqlCompilationException);

query = "select toBase64('hello!') from mytable";
expectedError = null;
try {
CalciteSqlParser.compileToPinotQuery(query);
} catch (Exception e) {
expectedError = e;
}
Assert.assertNotNull(expectedError);
Assert.assertTrue(expectedError instanceof SqlCompilationException);
}

@Test
Expand Down Expand Up @@ -2012,6 +2074,40 @@ public void testCompileTimeExpression() {
Assert.assertNotNull(expression.getFunctionCall());
Assert.assertEquals(expression.getFunctionCall().getOperator(), "count");
Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "*");

expression = CalciteSqlParser.compileToExpression("toBase64(toUtf8('hello!'))");
Assert.assertNotNull(expression.getFunctionCall());
pinotQuery.setFilterExpression(expression);
pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery);
expression = pinotQuery.getFilterExpression();
Assert.assertNotNull(expression.getLiteral());
Assert.assertEquals(expression.getLiteral().getFieldValue(), "aGVsbG8h");

expression = CalciteSqlParser.compileToExpression("fromUtf8(fromBase64('aGVsbG8h'))");
Assert.assertNotNull(expression.getFunctionCall());
pinotQuery.setFilterExpression(expression);
pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery);
expression = pinotQuery.getFilterExpression();
Assert.assertNotNull(expression.getLiteral());
Assert.assertEquals(expression.getLiteral().getFieldValue(), "hello!");

expression = CalciteSqlParser.compileToExpression("fromBase64(foo)");
Assert.assertNotNull(expression.getFunctionCall());
pinotQuery.setFilterExpression(expression);
pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery);
expression = pinotQuery.getFilterExpression();
Assert.assertNotNull(expression.getFunctionCall());
Assert.assertEquals(expression.getFunctionCall().getOperator(), "frombase64");
Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo");

expression = CalciteSqlParser.compileToExpression("toBase64(foo)");
Assert.assertNotNull(expression.getFunctionCall());
pinotQuery.setFilterExpression(expression);
pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery);
expression = pinotQuery.getFilterExpression();
Assert.assertNotNull(expression.getFunctionCall());
Assert.assertEquals(expression.getFunctionCall().getOperator(), "tobase64");
Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo");
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.Arrays;
import java.util.Base64;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
Expand Down Expand Up @@ -819,4 +820,27 @@ public void testConcatStringTransformFunction() {
}
testTransformFunctionMV(transformFunction, expectedValues);
}

@Test
public void testBase64TransformFunction() {
ExpressionContext expression = RequestContextUtils.getExpression(String.format("toBase64(%s)", BYTES_SV_COLUMN));
TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);
assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
assertEquals(transformFunction.getName(), "toBase64");
String[] expectedValues = new String[NUM_ROWS];
for (int i = 0; i < NUM_ROWS; i++) {
expectedValues[i] = Base64.getEncoder().encodeToString(_bytesSVValues[i]);
}
testTransformFunction(transformFunction, expectedValues);

expression = RequestContextUtils.getExpression(String.format("fromBase64(toBase64(%s))", BYTES_SV_COLUMN));
transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);
assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
assertEquals(transformFunction.getName(), "fromBase64");
byte[][] expectedBinaryValues = new byte[NUM_ROWS][];
for (int i = 0; i < NUM_ROWS; i++) {
expectedBinaryValues[i] = Base64.getDecoder().decode(Base64.getEncoder().encodeToString(_bytesSVValues[i]));
}
testTransformFunction(transformFunction, expectedBinaryValues);
}
}
Loading