Skip to content

Commit

Permalink
opensearch-project#639: allow metadata fields (_id) to be used as Qua…
Browse files Browse the repository at this point in the history
…lifiedName

Signed-off-by: Andrew Carbonetto <andrewc@bitquilltech.com>
  • Loading branch information
acarbonetto committed Oct 25, 2022
1 parent bfad32c commit 0cba405
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 14 deletions.
7 changes: 7 additions & 0 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,13 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) {
}
table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v));

// add OpenSearch metadata types
curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING);
curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING);
curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT);
curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT);
curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG);

// Put index name or its alias in index namespace on type environment so qualifier
// can be removed when analyzing qualified name. The value (expr type) here doesn't matter.
curEnv.define(new Symbol(Namespace.INDEX_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,27 @@
public class QualifiedName extends UnresolvedExpression {
private final List<String> parts;

@Getter
private final Boolean isMetadataField;

public QualifiedName(String name) {
this(name, Boolean.FALSE);
}

public QualifiedName(String name, Boolean isMetadataField) {
this.parts = Collections.singletonList(name);
this.isMetadataField = isMetadataField;
}

public QualifiedName(Iterable<String> parts) {
this(parts, Boolean.FALSE);
}

/**
* QualifiedName Constructor.
*/
public QualifiedName(Iterable<String> parts) {
public QualifiedName(Iterable<String> parts, Boolean isMetadataField) {
this.isMetadataField = isMetadataField;
List<String> partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList());
if (partsList.isEmpty()) {
throw new IllegalArgumentException("parts is empty");
Expand Down Expand Up @@ -110,4 +123,6 @@ public List<UnresolvedExpression> getChild() {
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitQualifiedName(this, context);
}

public Boolean isMetadataField() { return Boolean.TRUE; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public void specificPercentilesIntAndDouble() throws IOException {
}
}

// TODO: this should now work
@Ignore("only work for legacy engine")
public void nestedObjectsAndArraysAreQuoted() throws IOException {
final String query = String.format(Locale.ROOT, "SELECT * FROM %s WHERE _id = 5",
Expand All @@ -114,6 +115,7 @@ public void nestedObjectsAndArraysAreQuoted() throws IOException {
Assert.assertThat(result, containsString(expectedMessage));
}

// TODO: this should now work
@Ignore("only work for legacy engine")
public void arraysAreQuotedInFlatMode() throws IOException {
setFlatOption(true);
Expand Down Expand Up @@ -546,6 +548,7 @@ public void twoCharsSeperator() throws Exception {

}

// TODO: this should now work
@Ignore("only work for legacy engine")
public void includeIdAndNotTypeOrScore() throws Exception {
String query = String.format(Locale.ROOT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ public Map<String, ExprType> getFieldTypes() {
.filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
}
fieldTypes.put("_index", ExprCoreType.STRING);
fieldTypes.put("_id", ExprCoreType.STRING);
fieldTypes.put("_score", ExprCoreType.FLOAT);
fieldTypes.put("_maxscore", ExprCoreType.FLOAT);
fieldTypes.put("_sort", ExprCoreType.LONG);
return fieldTypes;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
import org.opensearch.search.SearchHits;
import org.opensearch.search.aggregations.Aggregations;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprStringValue;
import org.opensearch.sql.data.model.ExprFloatValue;
import org.opensearch.sql.data.model.ExprLongValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory;
Expand Down Expand Up @@ -92,23 +95,34 @@ public Iterator<ExprValue> iterator() {
return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build());
}).iterator();
} else {
float maxScore = hits.getMaxScore();
return Arrays.stream(hits.getHits())
.map(hit -> {
ExprValue docData = exprValueFactory.construct(hit.getSourceAsString());
if (hit.getHighlightFields().isEmpty()) {
return docData;
} else {
ImmutableMap.Builder<String, ExprValue> builder = new ImmutableMap.Builder<>();
builder.putAll(docData.tupleValue());
String source = hit.getSourceAsString();
ExprValue docData = exprValueFactory.construct(source);

ImmutableMap.Builder<String, ExprValue> builder = new ImmutableMap.Builder<>();
builder.putAll(docData.tupleValue());
builder.put("_index", new ExprStringValue(hit.getIndex()));
builder.put("_id", new ExprStringValue(hit.getId()));
if (!Float.isNaN(hit.getScore())) {
builder.put("_score", new ExprFloatValue(hit.getScore()));
}
if (!Float.isNaN(maxScore)) {
builder.put("_maxscore", new ExprLongValue(maxScore));
}
builder.put("_sort", new ExprLongValue(hit.getSeqNo()));

if (!hit.getHighlightFields().isEmpty()) {
var hlBuilder = ImmutableMap.<String, ExprValue>builder();
for (var es : hit.getHighlightFields().entrySet()) {
hlBuilder.put(es.getKey(), ExprValueUtils.collectionValue(
Arrays.stream(es.getValue().fragments()).map(
t -> (t.toString())).collect(Collectors.toList())));
}
builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build()));
return ExprTupleValue.fromExprValueMap(builder.build());
}
return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build());
}).iterator();
}
}
Expand Down
9 changes: 9 additions & 0 deletions sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,18 @@ qualifiedName
ident
: DOT? ID
| BACKTICK_QUOTE_ID
| metadataField
| keywordsCanBeId
;

metadataField
: META_INDEX
| META_ID
| META_SCORE
| META_MAXSCORE
| META_SORT
;

keywordsCanBeId
: FULL
| FIELD | D | T | TS // OD SQL and ODBC special
Expand Down
14 changes: 13 additions & 1 deletion sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ SUBSTRING: 'SUBSTRING';
TRIM: 'TRIM';


// Metadata fields can be ID

META_INDEX: '_ID';
META_ID: '_ID';
META_SCORE: '_SCORE';
META_MAXSCORE: '_MAXSCORE';
META_SORT: '_SORT';


// Keywords, but can be ID
// Common Keywords, but can be ID

Expand Down Expand Up @@ -441,14 +450,17 @@ BACKTICK_QUOTE_ID: BQUOTA_STRING;

// Fragments for Literal primitives
fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+;
fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*;
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
fragment HEX_DIGIT: [0-9A-F];
fragment DEC_DIGIT: [0-9];
fragment BIT_STRING_L: 'B' '\'' [01]+ '\'';

// Identifiers cannot start with a single '_' since this an OpebSearch reserved
// metadata field. Two underscores (or more) is acceptable, such as '__field'.
fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*;

// Last tokens must generate Errors

ERROR_RECOGNITION: . -> channel(ERRORCHANNEL);
Original file line number Diff line number Diff line change
Expand Up @@ -430,12 +430,13 @@ private UnresolvedExpression visitConstantFunction(String functionName,
}

private QualifiedName visitIdentifiers(List<IdentContext> identifiers) {
Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent();
return new QualifiedName(
identifiers.stream()
.map(RuleContext::getText)
.map(StringUtils::unquoteIdentifier)
.collect(Collectors.toList())
);
identifiers.stream()
.map(RuleContext::getText)
.map(StringUtils::unquoteIdentifier)
.collect(Collectors.toList()),
isMetadataField);
}

private List<UnresolvedExpression> singleFieldRelevanceArguments(
Expand Down

0 comments on commit 0cba405

Please sign in to comment.