Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce LIKE Operator #7214

Merged
merged 14 commits into from
Aug 12, 2021
Prev Previous commit
Next Next commit
Update per comments
  • Loading branch information
atris committed Aug 11, 2021
commit 2d485635bac9b5b3906012052535c2a11bac8169
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@
import org.apache.pinot.common.request.context.predicate.IsNotNullPredicate;
import org.apache.pinot.common.request.context.predicate.IsNullPredicate;
import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate;
import org.apache.pinot.common.request.context.predicate.LikePredicate;
import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
import org.apache.pinot.common.request.context.predicate.NotInPredicate;
import org.apache.pinot.common.request.context.predicate.RangePredicate;
import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
import org.apache.pinot.common.request.context.predicate.TextMatchPredicate;
import org.apache.pinot.common.utils.LikeToRegexFormatConverterUtil;
import org.apache.pinot.common.utils.request.FilterQueryTree;
import org.apache.pinot.pql.parsers.Pql2Compiler;
import org.apache.pinot.pql.parsers.pql2.ast.AstNode;
Expand Down Expand Up @@ -232,7 +232,8 @@ public static FilterContext getFilter(Expression thriftExpression) {
new RegexpLikePredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
case LIKE:
return new FilterContext(FilterContext.Type.PREDICATE, null,
new LikePredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
new RegexpLikePredicate(getExpression(operands.get(0)),
LikeToRegexFormatConverterUtil.processValue(getStringValue(operands.get(1)))));
case TEXT_MATCH:
return new FilterContext(FilterContext.Type.PREDICATE, null,
new TextMatchPredicate(getExpression(operands.get(0)), getStringValue(operands.get(1))));
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.apache.pinot.common.utils;

/**
* Utility for converting LIKE operator syntax to a regex
*/
public class LikeToRegexFormatConverterUtil {
/* Represents all metacharacters to be processed */
public static final String[] REGEXP_METACHARACTERS = {"\\","^","$","{","}","[","]","(",")",
"*","+","?","|","<",">","-","&"};

/**
* Process an incoming LIKE string and make it regexp friendly
* @param value LIKE operator styled predicate
* @return Result regex
*/
public static String processValue(String value) {
String result = escapeMetaCharacters(value);

result = result.replace(".", "\\.");
// ... escape any other potentially problematic characters here
result = result.replace("?", ".");

return result.replaceAll("(?<!\\\\)%", ".*");
}

/**
* Add escape characters before special characters
*/
private static String escapeMetaCharacters(String inputString) {

for (int i = 0 ; i < REGEXP_METACHARACTERS.length ; i++){
if(inputString.contains(REGEXP_METACHARACTERS[i])){
inputString = inputString.replace(REGEXP_METACHARACTERS[i],"\\"
+ REGEXP_METACHARACTERS[i]);
}
}
return inputString;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.apache.pinot.util;

import org.apache.pinot.common.utils.LikeToRegexFormatConverterUtil;
import org.testng.annotations.Test;


/**
* Tests for {@LikeToRegexFormatConverterUtil}
*/
public class TestLikeSyntaxConverter {

private static final String TRAILING_WILDCARD = "C+%";
private static final String LEADING_WILDCARD = "%++";
private static final String BOTH_SIDES_WILDCARD = "%+%";

@Test
public void testLeadingWildcard() {
String result = LikeToRegexFormatConverterUtil.processValue(LEADING_WILDCARD);

assert result.equals(".*\\+\\+");
}

@Test
public void testTrailingWildcard() {
String result = LikeToRegexFormatConverterUtil.processValue(TRAILING_WILDCARD);

assert result.equals("C\\+.*");
}

@Test
public void testBothSidesWildcard() {
String result = LikeToRegexFormatConverterUtil.processValue(BOTH_SIDES_WILDCARD);

assert result.equals(".*\\+.*");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import javax.annotation.Nullable;
import org.apache.pinot.common.request.context.predicate.EqPredicate;
import org.apache.pinot.common.request.context.predicate.InPredicate;
import org.apache.pinot.common.request.context.predicate.LikePredicate;
import org.apache.pinot.common.request.context.predicate.NotEqPredicate;
import org.apache.pinot.common.request.context.predicate.NotInPredicate;
import org.apache.pinot.common.request.context.predicate.Predicate;
Expand Down Expand Up @@ -58,8 +57,6 @@ public static PredicateEvaluator getPredicateEvaluator(Predicate predicate, @Nul
return RangePredicateEvaluatorFactory
.newDictionaryBasedEvaluator((RangePredicate) predicate, dictionary, dataType);
case LIKE:
return RegexpLikePredicateEvaluatorFactory
.newRawValueBasedEvaluator((LikePredicate) predicate, dataType);
case REGEXP_LIKE:
return RegexpLikePredicateEvaluatorFactory
.newDictionaryBasedEvaluator((RegexpLikePredicate) predicate, dictionary, dataType);
Expand All @@ -79,12 +76,10 @@ public static PredicateEvaluator getPredicateEvaluator(Predicate predicate, @Nul
return NotInPredicateEvaluatorFactory.newRawValueBasedEvaluator((NotInPredicate) predicate, dataType);
case RANGE:
return RangePredicateEvaluatorFactory.newRawValueBasedEvaluator((RangePredicate) predicate, dataType);
case LIKE:
case REGEXP_LIKE:
return RegexpLikePredicateEvaluatorFactory
.newRawValueBasedEvaluator((RegexpLikePredicate) predicate, dataType);
case LIKE:
return RegexpLikePredicateEvaluatorFactory
.newRawValueBasedEvaluator((LikePredicate) predicate, dataType);
default:
throw new UnsupportedOperationException("Unsupported predicate type: " + predicate.getType());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.common.request.context.FunctionContext;
import org.apache.pinot.common.request.context.predicate.JsonMatchPredicate;
import org.apache.pinot.common.request.context.predicate.LikePredicate;
import org.apache.pinot.common.request.context.predicate.Predicate;
import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
import org.apache.pinot.common.request.context.predicate.TextMatchPredicate;
Expand Down Expand Up @@ -175,24 +174,10 @@ private BaseFilterOperator constructPhysicalOperator(FilterContext filter,
String column = lhs.getIdentifier();
DataSource dataSource = _indexSegment.getDataSource(column);
switch (predicate.getType()) {
case LIKE:
PredicateEvaluator evaluator;
if (dataSource.getFSTIndex() != null) {
evaluator = FSTBasedRegexpPredicateEvaluatorFactory
.newFSTBasedEvaluator(dataSource.getFSTIndex(), dataSource.getDictionary(),
((LikePredicate) predicate).getValue());
} else if (dataSource instanceof MutableDataSource && ((MutableDataSource) dataSource).isFSTEnabled()) {
evaluator = FSTBasedRegexpPredicateEvaluatorFactory
.newAutomatonBasedEvaluator(dataSource.getDictionary(),
((LikePredicate) predicate).getValue());
} else {
evaluator = PredicateEvaluatorProvider.getPredicateEvaluator(predicate, dataSource.getDictionary(),
dataSource.getDataSourceMetadata().getDataType());
}
return FilterOperatorUtils.getLeafFilterOperator(evaluator, dataSource, _numDocs);
case TEXT_MATCH:
return new TextMatchFilterOperator(dataSource.getTextIndex(), ((TextMatchPredicate) predicate).getValue(),
_numDocs);
case LIKE:
case REGEXP_LIKE:
// FST Index is available only for rolled out segments. So, we use different evaluator for rolled out and
// consuming segments.
Expand All @@ -202,6 +187,7 @@ private BaseFilterOperator constructPhysicalOperator(FilterContext filter,
//
// Consuming segments: When FST is enabled, use AutomatonBasedEvaluator so that regexp matching logic is
// similar to that of FSTBasedEvaluator, else use regular flow of getting predicate evaluator.
PredicateEvaluator evaluator;
if (dataSource.getFSTIndex() != null) {
evaluator = FSTBasedRegexpPredicateEvaluatorFactory
.newFSTBasedEvaluator(dataSource.getFSTIndex(), dataSource.getDictionary(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ private void testInterSegmentSelectionQueryHelper(String query, int expectedResu

private void testSelectionResults(String query, int expectedResultSize, List<Serializable[]> expectedResults)
throws Exception {
Operator<IntermediateResultsBlock> operator = getOperatorForPqlQuery(query);
Operator<IntermediateResultsBlock> operator = getOperatorForSqlQuery(query);
IntermediateResultsBlock operatorResult = operator.nextBlock();
List<Object[]> resultset = (List<Object[]>) operatorResult.getSelectionResult();
Assert.assertNotNull(resultset);
Expand Down Expand Up @@ -344,6 +344,22 @@ public void testFSTBasedRegexLike()
testSelectionResults(query, 5, null);
}

@Test
public void testLikeOperator()
throws Exception {
String query = "SELECT INT_COL, URL_COL FROM MyTable WHERE DOMAIN_NAMES LIKE 'www.domain1%' LIMIT 50000";
testSelectionResults(query, 256, null);

query = "SELECT INT_COL, URL_COL FROM MyTable WHERE DOMAIN_NAMES LIKE 'www.sd.domain1%' LIMIT 50000";
testSelectionResults(query, 256, null);

query = "SELECT INT_COL, URL_COL FROM MyTable WHERE DOMAIN_NAMES LIKE '%domain1%' LIMIT 50000";
testSelectionResults(query, 512, null);

query = "SELECT INT_COL, URL_COL FROM MyTable WHERE DOMAIN_NAMES LIKE '%com' LIMIT 50000";
testSelectionResults(query, 256, null);
}

@Test
public void testFSTBasedRegexpLikeWithOtherFilters()
throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1199,26 +1199,6 @@ public void testLuceneRealtimeWithSearcherManager()
indexWriter.close();
}

@Test
public void testLikeOperator()
throws Exception {
String query =
"SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE SKILLS_TEXT_COL LIKE 'C++' LIMIT 50000";
testTextSearchSelectQueryHelper(query, 9, false, null);

query =
"SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE SKILLS_TEXT_COL LIKE 'C+%' LIMIT 50000";
testTextSearchSelectQueryHelper(query, 9, false, null);

query =
"SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE SKILLS_TEXT_COL LIKE '%Apa%' LIMIT 50000";
testTextSearchSelectQueryHelper(query, 6, false, null);

query =
"SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE SKILLS_TEXT_COL LIKE 'Machine learning' AND SKILLS_TEXT_COL LIKE 'gpu' LIMIT 50000";
testTextSearchSelectQueryHelper(query, 2, false, null);
}

/**
* Test the realtime search by verifying that realtime reader is able
* to see monotonically increasing number of uncommitted documents
Expand Down