-
Couldn't load subscription status.
- Fork 176
Filter script pushdown with RelJson serialization in Calcite #3859
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1894875
13acaf5
b37dc92
1abf8aa
bc6ecac
cb3a2fe
700fc29
fc77e5e
aba4fed
2cf247d
3049c0c
85f8df0
18ab9bf
5b18cc4
e522bc0
1da69d1
ee67876
c18006a
ffd26a7
3ff2f50
2fee6ee
08cd417
fde5cf0
d2a95ea
1dd6752
573552b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Copyright OpenSearch Contributors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package org.opensearch.sql.expression.operator.predicate; | ||
|
|
||
| import java.util.Map; | ||
| import java.util.concurrent.TimeUnit; | ||
| import org.apache.calcite.plan.RelOptCluster; | ||
| import org.apache.calcite.plan.volcano.VolcanoPlanner; | ||
| import org.apache.calcite.rel.type.RelDataType; | ||
| import org.apache.calcite.rel.type.StructKind; | ||
| import org.apache.calcite.rex.RexBuilder; | ||
| import org.apache.calcite.rex.RexNode; | ||
| import org.apache.calcite.sql.fun.SqlStdOperatorTable; | ||
| import org.apache.calcite.sql.type.SqlTypeName; | ||
| import org.openjdk.jmh.annotations.Benchmark; | ||
| import org.openjdk.jmh.annotations.BenchmarkMode; | ||
| import org.openjdk.jmh.annotations.Fork; | ||
| import org.openjdk.jmh.annotations.Measurement; | ||
| import org.openjdk.jmh.annotations.Mode; | ||
| import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
| import org.openjdk.jmh.annotations.Scope; | ||
| import org.openjdk.jmh.annotations.State; | ||
| import org.openjdk.jmh.annotations.Warmup; | ||
| import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory; | ||
| import org.opensearch.sql.data.type.ExprCoreType; | ||
| import org.opensearch.sql.data.type.ExprType; | ||
| import org.opensearch.sql.expression.DSL; | ||
| import org.opensearch.sql.expression.Expression; | ||
| import org.opensearch.sql.expression.function.BuiltinFunctionName; | ||
| import org.opensearch.sql.expression.function.PPLFuncImpTable; | ||
| import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer; | ||
| import org.opensearch.sql.opensearch.storage.serde.RelJsonSerializer; | ||
|
|
||
| @Warmup(iterations = 1) | ||
| @Measurement(iterations = 10) | ||
| @BenchmarkMode(Mode.AverageTime) | ||
| @OutputTimeUnit(TimeUnit.NANOSECONDS) | ||
| @State(Scope.Thread) | ||
| @Fork(value = 1) | ||
| public class ExpressionScriptSerdeBenchmark { | ||
|
|
||
| @Benchmark | ||
| public void testV2ExpressionSerde() { | ||
| DefaultExpressionSerializer defaultSerializer = new DefaultExpressionSerializer(); | ||
| Expression exprUpper = DSL.upper(DSL.ref("Referer", ExprCoreType.STRING)); | ||
| Expression exprNotEquals = DSL.notequal(exprUpper, DSL.literal("ABOUT")); | ||
|
|
||
| String serializedStr = defaultSerializer.serialize(exprNotEquals); | ||
| defaultSerializer.deserialize(serializedStr); | ||
| } | ||
|
|
||
| @Benchmark | ||
| public void testRexNodeJsonSerde() { | ||
| RexBuilder rexBuilder = new RexBuilder(OpenSearchTypeFactory.TYPE_FACTORY); | ||
| RelOptCluster cluster = RelOptCluster.create(new VolcanoPlanner(), rexBuilder); | ||
| RelJsonSerializer relJsonSerializer = new RelJsonSerializer(cluster); | ||
| RelDataType rowType = | ||
| rexBuilder | ||
| .getTypeFactory() | ||
| .builder() | ||
| .kind(StructKind.FULLY_QUALIFIED) | ||
| .add("Referer", rexBuilder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)) | ||
| .build(); | ||
| RexNode rexUpper = | ||
| PPLFuncImpTable.INSTANCE.resolve( | ||
| rexBuilder, | ||
| BuiltinFunctionName.UPPER, | ||
| rexBuilder.makeInputRef(rowType.getFieldList().get(0).getType(), 0)); | ||
| RexNode rexNotEquals = | ||
| rexBuilder.makeCall( | ||
| SqlStdOperatorTable.NOT_EQUALS, rexUpper, rexBuilder.makeLiteral("ABOUT")); | ||
| Map<String, ExprType> fieldTypes = Map.of("Referer", ExprCoreType.STRING); | ||
|
|
||
| String serializedStr = relJsonSerializer.serialize(rexNotEquals, rowType, fieldTypes); | ||
| relJsonSerializer.deserialize(serializedStr); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ | |
| import static org.opensearch.sql.util.MatcherUtils.assertJsonEqualsIgnoreId; | ||
|
|
||
| import java.io.IOException; | ||
| import org.junit.Ignore; | ||
| import org.junit.jupiter.api.Test; | ||
| import org.opensearch.client.ResponseException; | ||
| import org.opensearch.sql.legacy.TestUtils; | ||
|
|
@@ -434,6 +435,43 @@ public void testMultiFieldsRelevanceQueryFunctionExplain() throws IOException { | |
| + " default_operator='or', analyzer=english)")); | ||
| } | ||
|
|
||
| @Ignore("The serialized string is unstable because of function properties") | ||
| @Test | ||
| public void testFilterScriptPushDownExplain() throws Exception { | ||
| String expected = loadExpectedPlan("explain_filter_script_push.json"); | ||
| assertJsonEqualsIgnoreId( | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we avoid compare all function properties? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a member of FunctionExpression in V2. It has a reference of current Instant object. So the generated bytes string is not stable. It can't be avoided now. |
||
| expected, | ||
| explainQueryToString( | ||
| "source=opensearch-sql_test_index_account | where firstname ='Amber' and age - 2 = 30 |" | ||
| + " fields firstname, age")); | ||
| } | ||
|
|
||
| @Ignore("The serialized string is unstable because of function properties") | ||
| @Test | ||
| public void testFilterFunctionScriptPushDownExplain() throws Exception { | ||
| String expected = loadExpectedPlan("explain_filter_function_script_push.json"); | ||
| assertJsonEqualsIgnoreId( | ||
| expected, | ||
| explainQueryToString( | ||
| "source=opensearch-sql_test_index_account | where length(firstname) = 5 and abs(age) =" | ||
| + " 32 and balance = 39225 | fields firstname, age")); | ||
| } | ||
|
|
||
| @Test | ||
| public void testDifferentFilterScriptPushDownBehaviorExplain() throws Exception { | ||
| String explainedPlan = | ||
| explainQueryToString( | ||
| "source=opensearch-sql_test_index_account | where firstname != '' | fields firstname"); | ||
| if (isCalciteEnabled()) { | ||
| // Calcite pushdown as pure filter query | ||
| String expected = loadExpectedPlan("explain_filter_script_push_diff.json"); | ||
| assertJsonEqualsIgnoreId(expected, explainedPlan); | ||
| } else { | ||
| // V2 pushdown as script | ||
| assertTrue(explainedPlan.contains("{\\\"script\\\":")); | ||
| } | ||
| } | ||
|
|
||
| protected String loadExpectedPlan(String fileName) throws IOException { | ||
| String prefix; | ||
| if (isCalciteEnabled()) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -174,8 +174,9 @@ public void not_pushdown_throws_exception() throws IOException { | |
| String query1 = | ||
| "SOURCE=" | ||
| + TEST_INDEX_BEER | ||
| + " | EVAL answerId = AcceptedAnswerId + 1" | ||
| + " | WHERE simple_query_string(['Tags'], 'taste') and answerId > 200"; | ||
| + " | STATS count(AcceptedAnswerId) as count" | ||
| + " | EVAL dateStr = makedate(2025, count)" | ||
| + " | WHERE simple_query_string(['dateStr'], 'taste')"; | ||
|
Comment on lines
+177
to
+179
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change to more complex ppl query because now most of EVAL filters can be pushed down as script expression. Adding agg before filters will prevent them pushdown. |
||
| assertThrows(Exception.class, () -> executeQuery(query1)); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| { | ||
| "calcite": { | ||
| "logical": "LogicalProject(firstname=[$1], age=[$8])\n LogicalFilter(condition=[AND(=(CHAR_LENGTH($1), 5), =(ABS($8), 32), =($3, 39225))])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n", | ||
| "physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[firstname, balance, age], SCRIPT->AND(=(CHAR_LENGTH($0), 5), =(ABS($2), 32), =($1, 39225)), PROJECT->[firstname, age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"must\":[{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBPnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Aa17CiAgIm9wIjogewogICAgIm5hbWUiOiAiPSIsCiAgICAia2luZCI6ICJFUVVBTFMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgImtpbmQiOiAiQ0hBUl9MRU5HVEgiLAogICAgICAgICJzeW50YXgiOiAiRlVOQ1RJT04iCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiaW5wdXQiOiAwLAogICAgICAgICAgIm5hbWUiOiAiJDAiCiAgICAgICAgfQogICAgICBdCiAgICB9LAogICAgewogICAgICAibGl0ZXJhbCI6IDUsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAN0AAlmaXJzdG5hbWVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AC3hwfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+ABF0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAYeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAaAAAAAHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgAMfnEAfgAQdAAGU1RSSU5HfnEAfgAUdAAHS2V5d29yZHEAfgAZeHQAB2JhbGFuY2V+cQB+ABB0AARMT05HdAADYWdlcQB+ACV4eA==\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQBPnsKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJmaXJzdG5hbWUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgIm5hbWUiOiAiYWdlIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogZmFsc2UKfXQABGV4cHJ0Aal7CiAgIm9wIjogewogICAgIm5hbWUiOiAiPSIsCiAgICAia2luZCI6ICJFUVVBTFMiLAogICAgInN5bnRheCI6ICJCSU5BUlkiCiAgfSwKICAib3BlcmFuZHMiOiBbCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJBQlMiLAogICAgICAgICJraW5kIjogIk9USEVSX0ZVTkNUSU9OIiwKICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImlucHV0IjogMiwKICAgICAgICAgICJuYW1lIjogIiQyIgogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImxpdGVyYWwiOiAzMiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAA3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgALeHB+cgApb3JnLm9wZW5zZWFyY2guc3FsLmRhdGEudHlwZS5FeHByQ29yZVR5cGUAAAAAAAAAABIAAHhyAA5qYXZhLmxhbmcuRW51bQAAAAAAAAAAEgAAeHB0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AEXQABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABh4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+ABoAAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+AAx+cQB+ABB0AAZTVFJJTkd+cQB+ABR0AAdLZXl3b3JkcQB+ABl4dAAHYmFsYW5jZX5xAH4AEHQABExPTkd0AANhZ2VxAH4AJXh4\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},{\"term\":{\"balance\":{\"value\":39225,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"firstname\",\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n" | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please add the test:
source=opensearch-sql_test_index_account | where firstname != ''There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@songkant-aws can you add above test case in explain IT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added the above test. But the test itself is optimized as bool filter query before hitting script pushdown codepath. Ideally we should find a query that is chosen by optimizer. I'm checking if there is one.