Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
e336a90
Add regexp support
Mar 1, 2024
e25b479
Json index based extraction
Mar 1, 2024
56bc02a
Ordering fix
Mar 1, 2024
6ba201c
Add range filter
Mar 1, 2024
d1abd9b
Lint
Mar 1, 2024
444ee8d
Fix tests
Mar 2, 2024
f507797
Delay materialization until transform call
Mar 3, 2024
579661f
Extract literal type in range predicate
Mar 3, 2024
a5ed67d
Fix comparison
Mar 3, 2024
1db872c
Fix tests
Mar 3, 2024
73c40ee
Fix ordering
Mar 3, 2024
900ee14
Remove todo
Mar 4, 2024
6fb5066
Add support for null default value
Mar 4, 2024
7f164f5
Fixes
Mar 4, 2024
7249c71
Add comments
Mar 4, 2024
f139de0
Add tests
Mar 4, 2024
03007af
Remove rangeDataType from equality
Mar 4, 2024
1124a51
Add ITs
Mar 5, 2024
4bf276f
Merge branch 'master' of github.com:apache/pinot into jsonExtractIndexMv
Mar 8, 2024
f585f54
Lint
Mar 8, 2024
cf52952
Merge branch 'master' of github.com:apache/pinot into jsonExtractIndexMv
Mar 15, 2024
3ba6ba2
Merge branch 'master' of github.com:apache/pinot into jsonExtractIndexMv
Mar 18, 2024
f4a9129
Merge branch 'master' of github.com:apache/pinot into jsonExtractIndexMv
Mar 20, 2024
7d83f24
Consolidate with JsonExtractIndex
Mar 20, 2024
dcd8ca7
Lint
Mar 20, 2024
a7ec601
Lint
Mar 20, 2024
37d66a4
Move filter logic to new PR
Mar 21, 2024
e5a64bf
Lint
Mar 21, 2024
e4a717e
Add tests
Mar 21, 2024
de04d1a
Review comments
Mar 21, 2024
b695a60
Merge branch 'jsonExtractIndexMv' of github.com:saurabhd336/pinot int…
Mar 21, 2024
24dd429
Lint
Mar 21, 2024
c782fc0
Review comments
Mar 21, 2024
01b8280
Remove visibleForTesting
Mar 21, 2024
ffdaa92
Merge branch 'jsonExtractIndexMv' of github.com:saurabhd336/pinot int…
Mar 21, 2024
c923be1
Merge branch 'master' of github.com:apache/pinot into jsonExtractInde…
Mar 21, 2024
de51157
Lint
Mar 21, 2024
103967c
Merge branch 'master' of github.com:apache/pinot into jsonExtractInde…
Mar 22, 2024
8e45c63
Remove nested exclusive
Mar 22, 2024
3771339
Review comments
Mar 22, 2024
f8b9b3c
Lint fix
Mar 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ public enum TransformFunctionType {
ReturnTypes.cascade(opBinding -> positionalReturnTypeInferenceFromStringLiteral(opBinding, 2,
SqlTypeName.VARCHAR), SqlTypeTransforms.FORCE_NULLABLE),
OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER,
SqlTypeFamily.CHARACTER), ordinal -> ordinal > 2), "json_extract_index"),
SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER), ordinal -> ordinal > 2), "json_extract_index"),

JSON_EXTRACT_KEY("jsonExtractKey", ReturnTypes.TO_ARRAY,
OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER)), "json_extract_key"),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ public String getName() {

@Override
public void init(List<TransformFunction> arguments, Map<String, ColumnContext> columnContextMap) {
// Check that there are exactly 3 or 4 arguments
if (arguments.size() < 3 || arguments.size() > 4) {
// Check that there are exactly 3 or 4 or 5 arguments
if (arguments.size() < 3 || arguments.size() > 5) {
throw new IllegalArgumentException(
"Expected 3/4 arguments for transform function: jsonExtractIndex(jsonFieldName, 'jsonPath', 'resultsType',"
+ " ['defaultValue'])");
"Expected 3/4/5 arguments for transform function: jsonExtractIndex(jsonFieldName, 'jsonPath', 'resultsType',"
+ " ['defaultValue'], ['jsonFilterExpression'])");
}

TransformFunction firstArgument = arguments.get(0);
Expand All @@ -76,37 +76,45 @@ public void init(List<TransformFunction> arguments, Map<String, ColumnContext> c
if (!(secondArgument instanceof LiteralTransformFunction)) {
throw new IllegalArgumentException("JSON path argument must be a literal");
}
String inputJsonPath = ((LiteralTransformFunction) secondArgument).getStringLiteral();
_jsonPathString = ((LiteralTransformFunction) secondArgument).getStringLiteral();
try {
JsonPathCache.INSTANCE.getOrCompute(inputJsonPath);
JsonPathCache.INSTANCE.getOrCompute(_jsonPathString);
} catch (Exception e) {
throw new IllegalArgumentException("JSON path argument is not a valid JSON path");
}
_jsonPathString = inputJsonPath.substring(1); // remove $ prefix

TransformFunction thirdArgument = arguments.get(2);
if (!(thirdArgument instanceof LiteralTransformFunction)) {
throw new IllegalArgumentException("Result type argument must be a literal");
}
String resultsType = ((LiteralTransformFunction) thirdArgument).getStringLiteral().toUpperCase();
boolean isSingleValue = !resultsType.endsWith("_ARRAY");
if (isSingleValue && inputJsonPath.contains("[*]")) {
if (isSingleValue && _jsonPathString.contains("[*]")) {
throw new IllegalArgumentException(
"[*] syntax in json path is unsupported for singleValue field json_extract_index");
}
DataType dataType = isSingleValue ? DataType.valueOf(resultsType)
: DataType.valueOf(resultsType.substring(0, resultsType.length() - 6));

if (arguments.size() == 4) {
if (arguments.size() >= 4) {
TransformFunction fourthArgument = arguments.get(3);
if (!(fourthArgument instanceof LiteralTransformFunction)) {
throw new IllegalArgumentException("Default value must be a literal");
}
_defaultValue = dataType.convert(((LiteralTransformFunction) fourthArgument).getStringLiteral());
}

String filterJsonPath = null;
if (arguments.size() == 5) {
TransformFunction fifthArgument = arguments.get(4);
if (!(fifthArgument instanceof LiteralTransformFunction)) {
throw new IllegalArgumentException("JSON path filter argument must be a literal");
}
filterJsonPath = ((LiteralTransformFunction) fifthArgument).getStringLiteral();
}

_resultMetadata = new TransformResultMetadata(dataType, isSingleValue, false);
_valueToMatchingDocsMap = _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString);
_valueToMatchingDocsMap = _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString, filterJsonPath);
if (isSingleValue) {
// For single value result type, it's more efficient to use original docIDs map
_jsonIndexReader.convertFlattenedDocIdsToDocIds(_valueToMatchingDocsMap);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,23 @@ private void addMvTests(List<Object[]> testArguments) {
String.format("jsonExtractIndex(%s,'%s','STRING_ARRAY')", JSON_STRING_SV_COLUMN,
"$.arrayField[*].arrStringField"), "$.arrayField[*].arrStringField", DataType.STRING, false
});

// MV with filters
testArguments.add(new Object[]{
String.format(
"jsonExtractIndex(%s,'%s','INT_ARRAY', '0', 'REGEXP_LIKE(\"$.arrayField[*].arrStringField\", ''.*y.*'')')",
JSON_STRING_SV_COLUMN,
"$.arrayField[*].arrIntField"), "$.arrayField[?(@.arrStringField =~ /.*y.*/)].arrIntField", DataType.INT,
false
});

testArguments.add(new Object[]{
String.format(
"jsonExtractIndex(%s,'%s','STRING_ARRAY', '0', '\"$.arrayField[*].arrIntField\" > 2')",
JSON_STRING_SV_COLUMN,
"$.arrayField[*].arrStringField"), "$.arrayField[?(@.arrIntField > 2)].arrStringField", DataType.STRING,
false
});
}

@Test(dataProvider = "testJsonExtractIndexDefaultValue")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FilterContext;
Expand Down Expand Up @@ -367,10 +368,32 @@ public void convertFlattenedDocIdsToDocIds(Map<String, RoaringBitmap> valueToFla
}

@Override
public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey) {
public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey, @Nullable String filterString) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not introduced in this PR, but I think we should support json path key with and without $ prefix. See getMatchingFlattenedDocIds() for reference

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the support

Map<String, RoaringBitmap> valueToMatchingFlattenedDocIdsMap = new HashMap<>();
_readLock.lock();
try {
RoaringBitmap filteredFlattenedDocIds = null;
FilterContext filter;
if (filterString != null) {
filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterString));
Preconditions.checkArgument(!filter.isConstant(), "Invalid json match filter: " + filterString);
if (filter.getType() == FilterContext.Type.PREDICATE && isExclusive(filter.getPredicate().getType())) {
// Handle exclusive predicate separately because the flip can only be applied to the
// unflattened doc ids in order to get the correct result, and it cannot be nested
filteredFlattenedDocIds = getMatchingFlattenedDocIds(filter.getPredicate());
filteredFlattenedDocIds.flip(0, (long) _nextFlattenedDocId);
} else {
filteredFlattenedDocIds = getMatchingFlattenedDocIds(filter);
}
}
// Support 2 formats:
// - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
// - Legacy format (e.g. "a[1].b"='abc')
if (jsonPathKey.startsWith("$")) {
jsonPathKey = jsonPathKey.substring(1);
} else {
jsonPathKey = JsonUtils.KEY_SEPARATOR + jsonPathKey;
}
Pair<String, RoaringBitmap> result = getKeyAndFlattenedDocIds(jsonPathKey);
jsonPathKey = result.getLeft();
RoaringBitmap arrayIndexFlattenDocIds = result.getRight();
Expand All @@ -380,6 +403,9 @@ public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey
Map<String, RoaringBitmap> subMap = getMatchingKeysMap(jsonPathKey);
for (Map.Entry<String, RoaringBitmap> entry : subMap.entrySet()) {
RoaringBitmap flattenedDocIds = entry.getValue().clone();
if (filteredFlattenedDocIds != null) {
flattenedDocIds.and(filteredFlattenedDocIds);
}
if (arrayIndexFlattenDocIds != null) {
flattenedDocIds.and(arrayIndexFlattenDocIds);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Map;
import java.util.PriorityQueue;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FilterContext;
Expand Down Expand Up @@ -134,7 +135,8 @@ private MutableRoaringBitmap getMatchingFlattenedDocIds(FilterContext filter) {
case AND: {
List<FilterContext> children = filter.getChildren();
int numChildren = children.size();
MutableRoaringBitmap matchingDocIds = getMatchingFlattenedDocIds(children.get(0));
MutableRoaringBitmap matchingDocIds =
getMatchingFlattenedDocIds(children.get(0));
for (int i = 1; i < numChildren; i++) {
matchingDocIds.and(getMatchingFlattenedDocIds(children.get(i)));
}
Expand All @@ -143,7 +145,8 @@ private MutableRoaringBitmap getMatchingFlattenedDocIds(FilterContext filter) {
case OR: {
List<FilterContext> children = filter.getChildren();
int numChildren = children.size();
MutableRoaringBitmap matchingDocIds = getMatchingFlattenedDocIds(children.get(0));
MutableRoaringBitmap matchingDocIds =
getMatchingFlattenedDocIds(children.get(0));
for (int i = 1; i < numChildren; i++) {
matchingDocIds.or(getMatchingFlattenedDocIds(children.get(i)));
}
Expand Down Expand Up @@ -331,7 +334,40 @@ public void convertFlattenedDocIdsToDocIds(Map<String, RoaringBitmap> valueToFla
}

@Override
public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey) {
public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey, @Nullable String filterString) {
RoaringBitmap filteredFlattenedDocIds = null;
if (filterString != null) {
FilterContext filter;
try {
filter = RequestContextUtils.getFilter(CalciteSqlParser.compileToExpression(filterString));
Preconditions.checkArgument(!filter.isConstant());
} catch (Exception e) {
throw new BadQueryRequestException("Invalid json match filter: " + filterString);
}
if (filter.getType() == FilterContext.Type.PREDICATE && isExclusive(filter.getPredicate().getType())) {
// Handle exclusive predicate separately because the flip can only be applied to the
// unflattened doc ids in order to get the correct result, and it cannot be nested
filteredFlattenedDocIds = getMatchingFlattenedDocIds(filter.getPredicate()).toRoaringBitmap();
filteredFlattenedDocIds.flip(0, _numFlattenedDocs);
} else {
filteredFlattenedDocIds = getMatchingFlattenedDocIds(filter).toRoaringBitmap();
}
}
// Support 2 formats:
// - JSONPath format (e.g. "$.a[1].b"='abc', "$[0]"=1, "$"='abc')
// - Legacy format (e.g. "a[1].b"='abc')
if (_version == BaseJsonIndexCreator.VERSION_2) {
if (jsonPathKey.startsWith("$")) {
jsonPathKey = jsonPathKey.substring(1);
} else {
jsonPathKey = JsonUtils.KEY_SEPARATOR + jsonPathKey;
}
} else {
// For V1 backward-compatibility
if (jsonPathKey.startsWith("$.")) {
jsonPathKey = jsonPathKey.substring(2);
}
}
Map<String, RoaringBitmap> result = new HashMap<>();
Pair<String, MutableRoaringBitmap> pathKey = getKeyAndFlattenedDocIds(jsonPathKey);
if (pathKey.getRight() != null && pathKey.getRight().isEmpty()) {
Expand All @@ -347,6 +383,10 @@ public Map<String, RoaringBitmap> getMatchingFlattenedDocsMap(String jsonPathKey
for (int dictId = dictIds[0]; dictId < dictIds[1]; dictId++) {
String key = _dictionary.getStringValue(dictId);
RoaringBitmap docIds = _invertedIndex.getDocIds(dictId).toRoaringBitmap();
if (filteredFlattenedDocIds != null) {
docIds.and(filteredFlattenedDocIds);
}

if (arrayIndexFlattenDocIds != null) {
docIds.and(arrayIndexFlattenDocIds);
}
Expand Down
Loading