Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.plan;

import org.apache.calcite.plan.Contexts;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.tools.RelBuilderFactory;
import org.immutables.value.Value;
import org.opensearch.sql.calcite.utils.CalciteToolsHelper;

public interface OpenSearchRuleConfig extends RelRule.Config {

/** Return a custom RelBuilderFactory for creating OpenSearchRelBuilder */
@Override
@Value.Default
default RelBuilderFactory relBuilderFactory() {
return CalciteToolsHelper.proto(Contexts.of(RelFactories.DEFAULT_STRUCT));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public void apply(RelOptRuleCall call, LogicalAggregate aggregate, LogicalProjec

/** Rule configuration. */
@Value.Immutable
public interface Config extends RelRule.Config {
public interface Config extends OpenSearchRuleConfig {
Config GROUP_MERGE =
ImmutablePPLAggGroupMergeRule.Config.builder()
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ private RexNode aliasMaybe(RelBuilder builder, RexNode node, String alias) {

/** Rule configuration. */
@Value.Immutable
public interface Config extends RelRule.Config {
public interface Config extends OpenSearchRuleConfig {
Config SUM_CONVERTER =
ImmutablePPLAggregateConvertRule.Config.builder()
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.tools.RelRunner;
import org.apache.calcite.util.Holder;
import org.apache.calcite.util.Util;
Expand Down Expand Up @@ -127,6 +128,10 @@ public static Connection connect(FrameworkConfig config, JavaTypeFactory typeFac
}
}

public static RelBuilderFactory proto(final Context context) {
return (cluster, schema) -> new OpenSearchRelBuilder(context, cluster, schema);
}

/**
* This method copied from {@link Frameworks#withPrepare(FrameworkConfig,
* Frameworks.BasePrepareAction)}. The purpose is the method {@link
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ public void test() throws IOException {
}
logger.info("Running Query{}", i);
String ppl = sanitize(loadFromFile("clickbench/queries/q" + i + ".ppl"));
timing(summary, "q" + i, ppl);
// V2 gets unstable scripts, ignore them when comparing plan
if (isCalciteEnabled()) {
String expected = loadExpectedPlan("clickbench/q" + i + ".yaml");
assertYamlEqualsIgnoreId(expected, explainQueryYaml(ppl));
}
timing(summary, "q" + i, ppl);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,24 @@ public void testExplainSortOnMeasureMultiTermsWithScript() throws IOException {
+ " sort `count()`"));
}

@Test
public void testExplainSortOnMeasureComplex() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
String expected = loadExpectedPlan("explain_agg_sort_on_measure_complex1.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account | stats bucket_nullable=false sum(balance),"
+ " count() as c, dc(employer) by state | sort - c"));
expected = loadExpectedPlan("explain_agg_sort_on_measure_complex2.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account | eval new_state = lower(state) | stats"
+ " bucket_nullable=false sum(balance), count(), dc(employer) as d by gender,"
+ " new_state | sort - d"));
}

@Test
public void testExplainCompositeMultiBucketsAutoDateThenSortOnMeasureNotPushdown()
throws IOException {
Expand Down Expand Up @@ -1235,15 +1253,15 @@ public void testExplainCompositeRangeAutoDateThenSortOnMeasureNotPushdown() thro
}

@Test
public void testExplainMultipleAggregatorsWithSortOnOneMeasureNotPushDown() throws IOException {
public void testExplainMultipleCollationsWithSortOnOneMeasureNotPushDown() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
String expected =
loadExpectedPlan("explain_multiple_agg_with_sort_on_one_measure_not_push1.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account | stats bucket_nullable=false count() as c,"
+ " sum(balance) as s by state | sort c"));
+ " sum(balance) as s by state | sort c, state"));
expected = loadExpectedPlan("explain_multiple_agg_with_sort_on_one_measure_not_push2.yaml");
assertYamlEqualsIgnoreId(
expected,
Expand All @@ -1252,6 +1270,17 @@ public void testExplainMultipleAggregatorsWithSortOnOneMeasureNotPushDown() thro
+ " sum(balance) as s by state | sort c, s"));
}

@Test
public void testExplainSortOnMeasureMultiBucketsNotMultiTermsNotPushDown() throws IOException {
enabledOnlyWhenPushdownIsEnabled();
String expected = loadExpectedPlan("explain_agg_sort_on_measure_multi_buckets_not_pushed.yaml");
assertYamlEqualsIgnoreId(
expected,
explainQueryYaml(
"source=opensearch-sql_test_index_account | stats bucket_nullable=false count() as c,"
+ " sum(balance) as s by state, span(age, 5) | sort c"));
}

@Test
public void testExplainEvalMax() throws IOException {
String expected = loadExpectedPlan("explain_eval_max.json");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1174,4 +1174,55 @@ public void testStatsSpanSortOnMeasureMultiTermsWithScript() throws IOException
resetQueryBucketSize();
}
}

@Test
public void testStatsSortOnMeasureComplex() throws IOException {
try {
setQueryBucketSize(5);
JSONObject response =
executeQuery(
String.format(
"source=%s | stats bucket_nullable=false sum(balance), count() as c, dc(employer)"
+ " as d by state | sort - c | head 5",
TEST_INDEX_ACCOUNT));
verifySchema(
response,
schema("sum(balance)", null, "bigint"),
schema("c", null, "bigint"),
schema("d", null, "bigint"),
schema("state", null, "string"));
System.out.println(response);
verifyDataRows(
response,
rows(782199, 30, 30, "TX"),
rows(732523, 28, 28, "MD"),
rows(657957, 27, 27, "ID"),
rows(541575, 25, 25, "ME"),
rows(643489, 25, 25, "AL"));
response =
executeQuery(
String.format(
"source=%s | eval new_state = lower(state) | stats bucket_nullable=false"
+ " sum(balance), count() as c, dc(employer) as d by gender, new_state | sort"
+ " - d | head 5",
TEST_INDEX_ACCOUNT));
verifySchema(
response,
schema("sum(balance)", null, "bigint"),
schema("c", null, "bigint"),
schema("d", null, "bigint"),
schema("gender", null, "string"),
schema("new_state", null, "string"));
System.out.println(response);
verifyDataRows(
response,
rows(484567, 18, 18, "M", "md"),
rows(376394, 17, 17, "M", "id"),
rows(505688, 17, 17, "F", "tx"),
rows(375409, 16, 16, "M", "me"),
rows(432776, 15, 15, "M", "ok"));
} finally {
resetQueryBucketSize();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,4 @@ calcite:
LogicalFilter(condition=[IS NOT NULL($68)])
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
physical: |
EnumerableLimit(fetch=[10000])
EnumerableLimit(fetch=[10])
EnumerableSort(sort0=[$1], dir0=[DESC-nulls-last])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"RegionID":{"terms":{"field":"RegionID","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},sum(AdvEngineID)=SUM($0),c=COUNT(),avg(ResolutionWidth)=AVG($2),dc(UserID)=COUNT(DISTINCT $3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[sum(AdvEngineID), c, avg(ResolutionWidth), dc(UserID), RegionID], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"RegionID":{"terms":{"field":"RegionID","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(AdvEngineID)":{"sum":{"field":"AdvEngineID"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,4 @@ calcite:
LogicalFilter(condition=[AND(ILIKE($97, '%Google%', '\'), <>($63, ''), NOT(ILIKE($26, '%.google.%', '\')))])
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
physical: |
EnumerableLimit(fetch=[10000])
EnumerableLimit(fetch=[10])
EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(ILIKE($3, '%Google%', '\'), <>($1, ''), NOT(ILIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), PROJECT->[c, dc(UserID), SearchPhrase]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","case_insensitive":true,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","case_insensitive":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"SearchPhrase":{"terms":{"field":"SearchPhrase","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[URL, SearchPhrase, UserID, Title], FILTER->AND(ILIKE($3, '%Google%', '\'), <>($1, ''), NOT(ILIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},c=COUNT(),dc(UserID)=COUNT(DISTINCT $1)), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[c, dc(UserID), SearchPhrase], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","case_insensitive":true,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","case_insensitive":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["URL","SearchPhrase","UserID","Title"],"excludes":[]},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,4 @@ calcite:
LogicalFilter(condition=[<>($63, '')])
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
physical: |
EnumerableLimit(fetch=[10000])
EnumerableLimit(fetch=[10])
EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"SearchEngineID":{"terms":{"field":"SearchEngineID","missing_bucket":false,"order":"asc"}}},{"ClientIP":{"terms":{"field":"ClientIP","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[SearchPhrase, SearchEngineID, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($0, ''), IS NOT NULL($1), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), SearchEngineID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"SearchEngineID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["SearchPhrase","SearchEngineID","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"SearchEngineID|ClientIP":{"multi_terms":{"terms":[{"field":"SearchEngineID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,4 @@ calcite:
LogicalFilter(condition=[<>($63, '')])
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
physical: |
EnumerableLimit(fetch=[10000])
EnumerableLimit(fetch=[10])
EnumerableSort(sort0=[$0], dir0=[DESC-nulls-last])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":10000,"sources":[{"WatchID":{"terms":{"field":"WatchID","missing_bucket":false,"order":"asc"}}},{"ClientIP":{"terms":{"field":"ClientIP","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[PROJECT->[WatchID, SearchPhrase, IsRefresh, ClientIP, ResolutionWidth], FILTER->AND(<>($1, ''), IS NOT NULL($0), IS NOT NULL($3)), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},c=COUNT(),sum(IsRefresh)=SUM($2),avg(ResolutionWidth)=AVG($3)), SORT_AGG_METRICS->[2 DESC LAST], PROJECT->[c, sum(IsRefresh), avg(ResolutionWidth), WatchID, ClientIP], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"exists":{"field":"WatchID","boost":1.0}},{"exists":{"field":"ClientIP","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["WatchID","SearchPhrase","IsRefresh","ClientIP","ResolutionWidth"],"excludes":[]},"aggregations":{"WatchID|ClientIP":{"multi_terms":{"terms":[{"field":"WatchID"},{"field":"ClientIP"}],"size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"sum(IsRefresh)":{"sum":{"field":"IsRefresh"}},"avg(ResolutionWidth)":{"avg":{"field":"ResolutionWidth"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Loading
Loading