Skip to content

Commit 1dd6752

Browse files
committed
Consolidate with partial pushdown logic after merge
Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent d2a95ea commit 1dd6752

File tree

4 files changed

+81
-37
lines changed

4 files changed

+81
-37
lines changed

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,20 @@ public void supportPartialPushDown_NoPushIfAllFailed() throws IOException {
9595
assertJsonEqualsIgnoreId(expected, result);
9696
}
9797

98+
@Test
99+
public void supportPartialPushDownScript() throws IOException {
100+
Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled());
101+
// field `address` is text type without keyword subfield, so we cannot push it down.
102+
// But the second condition can be translated to script, so the second one is pushed down.
103+
String query =
104+
"source=opensearch-sql_test_index_account | where address = '671 Bristol Street' and age -"
105+
+ " 2 = 30 | fields firstname, age, address";
106+
var result = explainQueryToString(query);
107+
String expected =
108+
loadFromFile("expectedOutput/calcite/explain_partial_filter_script_push.json");
109+
assertJsonEqualsIgnoreId(expected, result);
110+
}
111+
98112
// Only for Calcite, as v2 gets unstable serialized string for function
99113
@Test
100114
public void testFilterScriptPushDownExplain() throws Exception {
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalProject(firstname=[$1], age=[$8], address=[$2])\n LogicalFilter(condition=[AND(=($2, '671 Bristol Street'), =(-($8, 2), 30))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableCalc(expr#0..2=[{inputs}], expr#3=['671 Bristol Street':VARCHAR], expr#4=[=($t1, $t3)], firstname=[$t0], age=[$t2], address=[$t1], $condition=[$t4])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, address, age], SCRIPT->=(-($2, 2), 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBVnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJhZGRyZXNzIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0AnJ7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPSIsCiAgICAia2luZCI6ICJFUVVBTFMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICItIiwKICAgICAgICAia2luZCI6ICJNSU5VUyIsCiAgICAgICAgInN5bnRheCI6ICJCSU5BUlkiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAyLAogICAgICAgICAgIm5hbWUiOiAiJDIiCiAgICAgICAgfSwKICAgICAgICB7CiAgICAgICAgICAibGl0ZXJhbCI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICB9CiAgICAgICAgfQogICAgICBdLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAA3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+AAx+cQB+ABB0AAZTVFJJTkd+cQB+ABR0AAdLZXl3b3JkcQB+ABl4dAAHYWRkcmVzc3NxAH4ACnEAfgAScQB+ABVxAH4AGXNxAH4AAAAAAAN3BAAAAAB4dAADYWdlfnEAfgAQdAAETE9OR3h4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\",\"address\",\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
5+
}
6+
}

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 55 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -177,16 +177,20 @@ public static QueryExpression analyzeExpression(
177177
List<String> schema,
178178
Map<String, ExprType> fieldTypes,
179179
RelDataType rowType,
180-
RelOptCluster cluster) throws ExpressionNotAnalyzableException {
180+
RelOptCluster cluster)
181+
throws ExpressionNotAnalyzableException {
181182
requireNonNull(expression, "expression");
182183
try {
183184
// visits expression tree
184185
QueryExpression queryExpression =
185186
(QueryExpression) expression.accept(new Visitor(schema, fieldTypes, rowType, cluster));
186187
return queryExpression;
187188
} catch (Throwable e) {
188-
Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class);
189-
throw new ExpressionNotAnalyzableException("Can't convert " + expression, e);
189+
if (e instanceof UnsupportedScriptException) {
190+
Throwables.throwIfInstanceOf(e, UnsupportedOperationException.class);
191+
throw new ExpressionNotAnalyzableException("Can't convert " + expression, e);
192+
}
193+
return new ScriptQueryExpression(expression, rowType, fieldTypes, cluster);
190194
}
191195
}
192196

@@ -604,9 +608,6 @@ private static QueryExpression constructQueryExpressionForSearch(
604608
}
605609

606610
private QueryExpression andOr(RexCall call) {
607-
QueryExpression[] expressions = new QueryExpression[call.getOperands().size()];
608-
boolean partial = false;
609-
int failedCount = 0;
610611
// For function isEmpty and isBlank, we implement them via expression `isNull or {@function}`,
611612
// Unlike `OR` in Java, `SHOULD` in DSL will evaluate both branches and lead to NPE.
612613
if (call.getKind() == SqlKind.OR
@@ -616,40 +617,41 @@ private QueryExpression andOr(RexCall call) {
616617
throw new UnsupportedScriptException(
617618
"DSL will evaluate both branches of OR with isNUll, prevent push-down to avoid NPE");
618619
}
620+
621+
QueryExpression[] expressions = new QueryExpression[call.getOperands().size()];
622+
PredicateAnalyzerException firstError = null;
623+
boolean partial = false;
624+
int failedCount = 0;
619625
for (int i = 0; i < call.getOperands().size(); i++) {
626+
RexNode operand = call.getOperands().get(i);
620627
try {
621-
Expression expr = call.getOperands().get(i).accept(this);
622-
if (expr instanceof NamedFieldExpression) {
623-
// nop currently
624-
} else {
625-
expressions[i] = (QueryExpression) call.getOperands().get(i).accept(this);
626-
// Update or simplify the analyzed node list if it is not partial.
627-
if (!expressions[i].isPartial())
628-
expressions[i].updateAnalyzedNodes(call.getOperands().get(i));
628+
Expression expr = tryAnalyzeOperand(operand);
629+
if (expr instanceof QueryExpression) {
630+
expressions[i] = (QueryExpression) expr;
631+
partial |= expressions[i].isPartial();
629632
}
630-
partial |= expressions[i].isPartial();
631633
} catch (PredicateAnalyzerException e) {
632-
try {
633-
expressions[i] =
634-
new ScriptQueryExpression(call.getOperands().get(i), rowType, fieldTypes, cluster);
635-
if (!expressions[i].isPartial())
636-
expressions[i].updateAnalyzedNodes(call.getOperands().get(i));
637-
} catch (UnsupportedScriptException ex) {
638-
if (call.getKind() == SqlKind.OR) throw ex;
639-
partial = true;
634+
if (firstError == null) {
635+
firstError = e;
640636
}
641-
} catch (UnsupportedScriptException e) {
642-
if (call.getKind() == SqlKind.OR) throw e;
643637
partial = true;
644638
++failedCount;
645639
// If we cannot analyze the operand, wrap the RexNode with UnAnalyzableQueryExpression and
646640
// record them in the array. We will reuse them later.
647-
expressions[i] = new UnAnalyzableQueryExpression(call.getOperands().get(i));
641+
expressions[i] = new UnAnalyzableQueryExpression(operand);
648642
}
649643
}
650644

651645
switch (call.getKind()) {
652646
case OR:
647+
if (partial) {
648+
if (firstError != null) {
649+
throw firstError;
650+
} else {
651+
final String message = format(Locale.ROOT, "Unable to handle call: [%s]", call);
652+
throw new PredicateAnalyzerException(message);
653+
}
654+
}
653655
return CompoundQueryExpression.or(expressions);
654656
case AND:
655657
if (failedCount == call.getOperands().size()) {
@@ -664,6 +666,30 @@ private QueryExpression andOr(RexCall call) {
664666
}
665667
}
666668

669+
private Expression tryAnalyzeOperand(RexNode node) {
670+
try {
671+
Expression expr = node.accept(this);
672+
if (expr instanceof NamedFieldExpression) {
673+
return expr;
674+
}
675+
QueryExpression qe = (QueryExpression) expr;
676+
if (!qe.isPartial()) {
677+
qe.updateAnalyzedNodes(node);
678+
}
679+
return qe;
680+
} catch (PredicateAnalyzerException firstFailed) {
681+
try {
682+
QueryExpression qe = new ScriptQueryExpression(node, rowType, fieldTypes, cluster);
683+
if (!qe.isPartial()) {
684+
qe.updateAnalyzedNodes(node);
685+
}
686+
return qe;
687+
} catch (UnsupportedScriptException secondFailed) {
688+
throw new PredicateAnalyzerException(secondFailed);
689+
}
690+
}
691+
}
692+
667693
/**
668694
* Holder class for a pair of expressions. Used to convert {@code 1 = foo} into {@code foo = 1}
669695
*/
@@ -1297,6 +1323,7 @@ private static String timestampValueForPushDown(String value) {
12971323

12981324
public static class ScriptQueryExpression extends QueryExpression {
12991325
private final String code;
1326+
private RexNode analyzedNode;
13001327

13011328
public ScriptQueryExpression(
13021329
RexNode rexNode,
@@ -1331,12 +1358,12 @@ public QueryBuilder builder() {
13311358

13321359
@Override
13331360
public List<RexNode> getAnalyzedNodes() {
1334-
return List.of();
1361+
return List.of(analyzedNode);
13351362
}
13361363

13371364
@Override
13381365
public void updateAnalyzedNodes(RexNode rexNode) {
1339-
1366+
this.analyzedNode = rexNode;
13401367
}
13411368

13421369
@Override

opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType;
4848
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType.MappingType;
4949
import org.opensearch.sql.opensearch.request.PredicateAnalyzer.ExpressionNotAnalyzableException;
50-
import org.opensearch.sql.opensearch.storage.script.CalciteScriptEngine.UnsupportedScriptException;
5150

5251
public class PredicateAnalyzerTest {
5352
final RelDataTypeFactory typeFactory = new SqlTypeFactoryImpl(RelDataTypeSystem.DEFAULT);
@@ -660,11 +659,11 @@ void equals_throwException_TextWithoutKeyword() {
660659
final RexInputRef field3 =
661660
builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 2);
662661
RexNode call = builder.makeCall(SqlStdOperatorTable.EQUALS, field3, stringLiteral);
663-
IllegalArgumentException exception =
662+
ExpressionNotAnalyzableException exception =
664663
assertThrows(
665-
IllegalArgumentException.class,
664+
ExpressionNotAnalyzableException.class,
666665
() -> PredicateAnalyzer.analyze(call, schema, fieldTypes));
667-
assertEquals("field name is null or empty", exception.getMessage());
666+
assertEquals("Can't convert =($2, 'Hi')", exception.getMessage());
668667
}
669668

670669
@Test
@@ -679,12 +678,10 @@ void isNullOr_throwException() {
679678
.build();
680679
// PPL IS_EMPTY is translated to OR(IS_NULL(arg), IS_EMPTY(arg))
681680
RexNode call = PPLFuncImpTable.INSTANCE.resolve(builder, BuiltinFunctionName.IS_EMPTY, field2);
682-
UnsupportedScriptException exception =
681+
ExpressionNotAnalyzableException exception =
683682
assertThrows(
684-
UnsupportedScriptException.class,
683+
ExpressionNotAnalyzableException.class,
685684
() -> PredicateAnalyzer.analyzeExpression(call, schema, fieldTypes, rowType, cluster));
686-
assertEquals(
687-
"DSL will evaluate both branches of OR with isNUll, prevent push-down to avoid NPE",
688-
exception.getMessage());
685+
assertEquals("Can't convert OR(IS NULL($1), IS EMPTY($1))", exception.getMessage());
689686
}
690687
}

0 commit comments

Comments
 (0)