Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.pinot.common.request.context.predicate;

import java.util.Objects;
import java.util.regex.Pattern;
import org.apache.pinot.common.request.context.ExpressionContext;


Expand All @@ -28,6 +29,7 @@
public class RegexpLikePredicate implements Predicate {
private final ExpressionContext _lhs;
private final String _value;
private Pattern _pattern = null;

public RegexpLikePredicate(ExpressionContext lhs, String value) {
_lhs = lhs;
Expand All @@ -48,6 +50,13 @@ public String getValue() {
return _value;
}

public Pattern getPattern() {
if (_pattern == null) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do this lazily because it's an overhead when an FST index is available

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pattern can be accessed by multiple threads, so might be better to make it volatile or make it atomic swap?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't matter, the worst that can happen is it gets compiled more than once because the operation is idempotent.

_pattern = Pattern.compile(_value, Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE);
}
return _pattern;
}

@Override
public boolean equals(Object o) {
if (this == o) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.spi.data.FieldSpec.DataType;
Expand Down Expand Up @@ -61,22 +61,22 @@ public static BaseRawValueBasedPredicateEvaluator newRawValueBasedEvaluator(Rege
return new RawValueBasedRegexpLikePredicateEvaluator(regexpLikePredicate);
}

private static final int PATTERN_FLAG = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;

private static final class DictionaryBasedRegexpLikePredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator {
final Pattern _pattern;
// Reuse matcher to avoid excessive allocation. This is safe to do because the evaluator is always used
// within the scope of a single thread.
final Matcher _matcher;
final Dictionary _dictionary;
int[] _matchingDictIds;

public DictionaryBasedRegexpLikePredicateEvaluator(RegexpLikePredicate regexpLikePredicate, Dictionary dictionary) {
super(regexpLikePredicate);
_pattern = Pattern.compile(regexpLikePredicate.getValue(), PATTERN_FLAG);
_dictionary = dictionary;
_matcher = regexpLikePredicate.getPattern().matcher("");
}

@Override
public boolean applySV(int dictId) {
return _pattern.matcher(_dictionary.getStringValue(dictId)).find();
return _matcher.reset(_dictionary.getStringValue(dictId)).find();
}

@Override
Expand All @@ -96,11 +96,13 @@ public int[] getMatchingDictIds() {
}

private static final class RawValueBasedRegexpLikePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator {
final Pattern _pattern;
// Reuse matcher to avoid excessive allocation. This is safe to do because the evaluator is always used
// within the scope of a single thread.
final Matcher _matcher;

public RawValueBasedRegexpLikePredicateEvaluator(RegexpLikePredicate regexpLikePredicate) {
super(regexpLikePredicate);
_pattern = Pattern.compile(regexpLikePredicate.getValue(), PATTERN_FLAG);
_matcher = regexpLikePredicate.getPattern().matcher("");
}

@Override
Expand All @@ -110,7 +112,7 @@ public DataType getDataType() {

@Override
public boolean applySV(String value) {
return _pattern.matcher(value).find();
return _matcher.reset(value).find();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ public static void main(String[] args)
private static final String RAW_INT_COL_NAME = "RAW_INT_COL";
private static final String RAW_STRING_COL_NAME = "RAW_STRING_COL";
private static final String NO_INDEX_INT_COL_NAME = "NO_INDEX_INT_COL";
private static final String NO_INDEX_STRING_COL = "NO_INDEX_STRING_COL";

public static final String FILTERED_QUERY = "SELECT SUM(INT_COL) FILTER(WHERE INT_COL > 123 AND INT_COL < 599999),"
+ "MAX(INT_COL) FILTER(WHERE INT_COL > 123 AND INT_COL < 599999) "
Expand All @@ -112,13 +113,16 @@ public static void main(String[] args)
public static final String MULTI_GROUP_BY_WITH_RAW_QUERY_2 = "SELECT RAW_STRING_COL,RAW_INT_COL,INT_COL,COUNT(*) "
+ "FROM MyTable GROUP BY RAW_STRING_COL,RAW_INT_COL,INT_COL";

public static final String NO_INDEX_LIKE_QUERY = "SELECT RAW_INT_COL FROM MyTable "
+ "WHERE NO_INDEX_STRING_COL LIKE '%foo%'";

@Param("1500000")
private int _numRows;
@Param({"EXP(0.001)", "EXP(0.5)", "EXP(0.999)"})
String _scenario;
@Param({
MULTI_GROUP_BY_WITH_RAW_QUERY, MULTI_GROUP_BY_WITH_RAW_QUERY_2, FILTERED_QUERY, NON_FILTERED_QUERY,
SUM_QUERY
SUM_QUERY, NO_INDEX_LIKE_QUERY
})
String _query;
private IndexSegment _indexSegment;
Expand Down Expand Up @@ -164,11 +168,12 @@ private List<GenericRow> createTestData(int numRows) {
List<GenericRow> rows = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
GenericRow row = new GenericRow();
row.putField(INT_COL_NAME, (int) _supplier.getAsLong());
row.putField(NO_INDEX_INT_COL_NAME, (int) _supplier.getAsLong());
row.putField(RAW_INT_COL_NAME, (int) _supplier.getAsLong());
row.putField(RAW_STRING_COL_NAME,
row.putValue(INT_COL_NAME, (int) _supplier.getAsLong());
row.putValue(NO_INDEX_INT_COL_NAME, (int) _supplier.getAsLong());
row.putValue(RAW_INT_COL_NAME, (int) _supplier.getAsLong());
row.putValue(RAW_STRING_COL_NAME,
strings.computeIfAbsent((int) _supplier.getAsLong(), k -> UUID.randomUUID().toString()));
row.putValue(NO_INDEX_STRING_COL, row.getValue(RAW_STRING_COL_NAME));
rows.add(row);
}
return rows;
Expand All @@ -189,6 +194,7 @@ private void buildSegment(String segmentName)
.addSingleValueDimension(RAW_INT_COL_NAME, FieldSpec.DataType.INT)
.addSingleValueDimension(INT_COL_NAME, FieldSpec.DataType.INT)
.addSingleValueDimension(RAW_STRING_COL_NAME, FieldSpec.DataType.STRING)
.addSingleValueDimension(NO_INDEX_STRING_COL, FieldSpec.DataType.STRING)
.build();
SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema);
config.setOutDir(INDEX_DIR.getPath());
Expand Down