Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,23 @@ public void testLiteralOnlyTransformBrokerRequestFromSQL() {
CalciteSqlParser.compileToPinotQuery("SELECT count(*) from foo " + "where bar = fromBase64('aGVsbG8h')")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT count(*) from foo " + "where bar = fromUtf8(fromBase64('aGVsbG8h'))")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
.compileToPinotQuery("SELECT count(*) from myTable where regexpReplace(col1, \"b(..)\", \"X$1Y\") = "
+ "\"fooXarYXazY\"")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
.compileToPinotQuery("SELECT count(*) from myTable where regexpReplace(col1, \"b(..)\", \"X$1Y\", 10) = "
+ "\"fooXarYXazY\"")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
.compileToPinotQuery("SELECT count(*) from myTable where regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1) = "
+ "\"fooXarYXazY\"")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
.compileToPinotQuery("SELECT count(*) from myTable where regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1, "
+ "\"i\") = "
+ "\"fooXarYXazY\"")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
.compileToPinotQuery("SELECT count(*) from myTable where regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1, "
+ "\"m\") = "
+ "\"fooXarYXazY\"")));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,4 +588,111 @@ public static String toBase64(byte[] input) {
public static byte[] fromBase64(String input) {
return Base64.getDecoder().decode(input);
}

/**
* Replace a regular expression pattern. If matchStr is not found, inputStr will be returned. By default, all
* occurences of match pattern in the input string will be replaced. Default matching pattern is case sensitive.
*
* @param inputStr Input string to apply the regexpReplace
* @param matchStr Regexp or string to match against inputStr
* @param replaceStr Regexp or string to replace if matchStr is found
* @param matchStartPos Index of inputStr from where matching should start. Default is 0.
* @param occurence Controls which occurence of the matched pattern must be replaced. Counting starts at 0. Default
* is -1
* @param flag Single character flag that controls how the regex finds matches in inputStr. If an incorrect flag is
* specified, the function applies default case sensitive match. Only one flag can be specified. Supported
* flags:
* i -> Case insensitive
* @return replaced input string
*/
@ScalarFunction
public static String regexpReplace(String inputStr, String matchStr, String replaceStr, int matchStartPos,
int occurence, String flag) {
Integer patternFlag;

// TODO: Support more flags like MULTILINE, COMMENTS, etc.
switch (flag) {
case "i":
patternFlag = Pattern.CASE_INSENSITIVE;
break;
default:
patternFlag = null;
break;
}

Pattern p;
if (patternFlag != null) {
p = Pattern.compile(matchStr, patternFlag);
} else {
p = Pattern.compile(matchStr);
}

Matcher matcher = p.matcher(inputStr).region(matchStartPos, inputStr.length());
StringBuffer sb;

if (occurence >= 0) {
sb = new StringBuffer(inputStr);
while (occurence >= 0 && matcher.find()) {
if (occurence == 0) {
sb.replace(matcher.start(), matcher.end(), replaceStr);
break;
}
occurence--;
}
} else {
sb = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(sb, replaceStr);
}
matcher.appendTail(sb);
}

return sb.toString();
}

/**
* See #regexpReplace(String, String, String, int, int, String). Matches against entire inputStr and replaces all
* occurences. Match is performed in case-sensitive mode.
*
* @param inputStr Input string to apply the regexpReplace
* @param matchStr Regexp or string to match against inputStr
* @param replaceStr Regexp or string to replace if matchStr is found
* @return replaced input string
*/
@ScalarFunction
public static String regexpReplace(String inputStr, String matchStr, String replaceStr) {
return regexpReplace(inputStr, matchStr, replaceStr, 0, -1, "");
}

/**
* See #regexpReplace(String, String, String, int, int, String). Matches against entire inputStr and replaces all
* occurences. Match is performed in case-sensitive mode.
*
* @param inputStr Input string to apply the regexpReplace
* @param matchStr Regexp or string to match against inputStr
* @param replaceStr Regexp or string to replace if matchStr is found
* @param matchStartPos Index of inputStr from where matching should start. Default is 0.
* @return replaced input string
*/
@ScalarFunction
public static String regexpReplace(String inputStr, String matchStr, String replaceStr, int matchStartPos) {
return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, -1, "");
}

/**
* See #regexpReplace(String, String, String, int, int, String). Match is performed in case-sensitive mode.
*
* @param inputStr Input string to apply the regexpReplace
* @param matchStr Regexp or string to match against inputStr
* @param replaceStr Regexp or string to replace if matchStr is found
* @param matchStartPos Index of inputStr from where matching should start. Default is 0.
* @param occurence Controls which occurence of the matched pattern must be replaced. Counting starts
* at 0. Default is -1
* @return replaced input string
*/
@ScalarFunction
public static String regexpReplace(String inputStr, String matchStr, String replaceStr, int matchStartPos,
int occurence) {
return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, occurence, "");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
import static org.apache.pinot.controller.helix.core.PinotHelixResourceManager.EXTERNAL_VIEW_CHECK_INTERVAL_MS;
import static org.apache.pinot.controller.helix.core.PinotHelixResourceManager.EXTERNAL_VIEW_ONLINE_SEGMENTS_MAX_WAIT_MS;
import static org.testng.Assert.*;
import static org.testng.Assert.assertEquals;


/**
Expand Down Expand Up @@ -590,6 +591,124 @@ public void testTimeFunc()
assertEquals(oneHourAgoTodayStr, expectedOneHourAgoTodayStr);
}

@Test
public void testRegexpReplace() throws Exception {
// Correctness tests of regexpReplace.

// Test replace all.
String sqlQuery = "SELECT regexpReplace('CA', 'C', 'TEST')";
JsonNode response = postQuery(sqlQuery, _brokerBaseApiUrl);
String result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "TESTA");

sqlQuery = "SELECT regexpReplace('foobarbaz', 'b', 'X')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "fooXarXaz");

sqlQuery = "SELECT regexpReplace('foobarbaz', 'b', 'XY')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "fooXYarXYaz");

sqlQuery = "SELECT regexpReplace('Argentina', '(.)', '$1 ')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "A r g e n t i n a ");

sqlQuery = "SELECT regexpReplace('Pinot is blazing fast', '( ){2,}', ' ')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "Pinot is blazing fast");

sqlQuery = "SELECT regexpReplace('healthy, wealthy, and wise','\\w+thy', 'something')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "something, something, and wise");

sqlQuery = "SELECT regexpReplace('11234567898','(\\d)(\\d{3})(\\d{3})(\\d{4})', '$1-($2) $3-$4')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "1-(123) 456-7898");

// Test replace starting at index.

sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+thy', 'something', 4)";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "healthy, something, something and wise");

sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+thy', 'something', 1)";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "hsomething, something, something and wise");


// Test occurence
sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+thy', 'something', 0, 2)";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "healthy, wealthy, something and wise");

sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+thy', 'something', 0, 0)";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "something, wealthy, stealthy and wise");

// Test flags
sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+tHy', 'something', 0, 0, 'i')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "something, wealthy, stealthy and wise");

// Negative test. Pattern match not found.
sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+tHy', 'something')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "healthy, wealthy, stealthy and wise");

// Negative test. Pattern match not found.
sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+tHy', 'something', 3, 21, 'i')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "healthy, wealthy, stealthy and wise");

// Negative test - incorrect flag
sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and wise','\\w+tHy', 'something', 3, 12, 'xyz')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
result = response.get("resultTable").get("rows").get(0).get(0).asText();
assertEquals(result, "healthy, wealthy, stealthy and wise");

// Test in select clause with column values
sqlQuery = "SELECT regexpReplace(DestCityName, ' ', '', 0, -1, 'i') from myTable where OriginState = 'CA'";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
JsonNode rows = response.get("resultTable").get("rows");
for (int i = 0; i < rows.size(); i++) {
JsonNode row = rows.get(i);
boolean containsSpace = row.get(0).asText().contains(" ");
assertEquals(containsSpace, false);
}

// Test in where clause
sqlQuery = "SELECT count(*) from myTable where regexpReplace(originState, '[VC]A', 'TEST') = 'TEST'";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
int count1 = response.get("resultTable").get("rows").get(0).get(0).asInt();
sqlQuery = "SELECT count(*) from myTable where originState='CA' or originState='VA'";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
int count2 = response.get("resultTable").get("rows").get(0).get(0).asInt();
assertEquals(count1, count2);

// Test nested transform
sqlQuery = "SELECT count(*) from myTable where contains(regexpReplace(originState, '(C)(A)', '$1TEST$2'), "
+ "'CTESTA')";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
count1 = response.get("resultTable").get("rows").get(0).get(0).asInt();
sqlQuery = "SELECT count(*) from myTable where originState='CA'";
response = postQuery(sqlQuery, _brokerBaseApiUrl);
count2 = response.get("resultTable").get("rows").get(0).get(0).asInt();
assertEquals(count1, count2);
}

@Test
public void testUrlFunc()
throws Exception {
Expand Down