-
Notifications
You must be signed in to change notification settings - Fork 2k
perf: Implement physical execution of uncorrelated scalar subqueries #21240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4a3e553
6b4f5c0
1412ab1
cedfa5c
d80569f
9f606fb
b07491b
27a1ac2
bce0a6d
7071001
b9bce91
7c965aa
09f167a
54a9f79
b979e3d
2c256e7
99d9bcf
9a11d62
9b217ca
5aef67e
3d0b99f
f99ded5
b02abf8
3971312
64e9f34
26d8acb
d2af491
f9c9d5d
92e6054
6857966
6a4f524
7adb788
670139c
1239e3a
7bb6959
4c824a4
ee58247
f582628
4e7442c
a2087d0
dc4ca31
0416b0d
b9d307b
e787873
ce1e3c0
c32edba
50d0ef8
5935028
4b60787
177a190
cf582e3
2245153
19f796c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -136,9 +136,11 @@ fn optimize_projections( | |
| // their parents' required indices. | ||
| match plan { | ||
| LogicalPlan::Projection(proj) => { | ||
| return merge_consecutive_projections(proj)?.transform_data(|proj| { | ||
| rewrite_projection_given_requirements(proj, config, &indices) | ||
| }); | ||
| return merge_consecutive_projections(proj)? | ||
| .transform_data(|proj| { | ||
| rewrite_projection_given_requirements(proj, config, &indices) | ||
| })? | ||
| .transform_data(|plan| optimize_subqueries(plan, config)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is unfortuante as it seems like we'll have to add this extra traversal for all passes that want to recurse into subqueries. Hwever, I think that is not introduced by this PR so we can perhaps deal with it in a follow on |
||
| } | ||
| LogicalPlan::Aggregate(aggregate) => { | ||
| // Split parent requirements to GROUP BY and aggregate sections: | ||
|
|
@@ -210,7 +212,8 @@ fn optimize_projections( | |
| new_aggr_expr, | ||
| ) | ||
| .map(LogicalPlan::Aggregate) | ||
| }); | ||
| })? | ||
| .transform_data(|plan| optimize_subqueries(plan, config)); | ||
| } | ||
| LogicalPlan::Window(window) => { | ||
| let input_schema = Arc::clone(window.input.schema()); | ||
|
|
@@ -250,7 +253,8 @@ fn optimize_projections( | |
| .map(LogicalPlan::Window) | ||
| .map(Transformed::yes) | ||
| } | ||
| }); | ||
| })? | ||
| .transform_data(|plan| optimize_subqueries(plan, config)); | ||
| } | ||
| LogicalPlan::TableScan(table_scan) => { | ||
| let TableScan { | ||
|
|
@@ -271,7 +275,8 @@ fn optimize_projections( | |
| let new_scan = | ||
| TableScan::try_new(table_name, source, Some(projection), filters, fetch)?; | ||
|
|
||
| return Ok(Transformed::yes(LogicalPlan::TableScan(new_scan))); | ||
| return Transformed::yes(LogicalPlan::TableScan(new_scan)) | ||
| .transform_data(|plan| optimize_subqueries(plan, config)); | ||
| } | ||
| // Other node types are handled below | ||
| _ => {} | ||
|
|
@@ -463,6 +468,9 @@ fn optimize_projections( | |
| ) | ||
| })?; | ||
|
|
||
| let transformed_plan = | ||
| transformed_plan.transform_data(|plan| optimize_subqueries(plan, config))?; | ||
|
|
||
| // If any of the children are transformed, we need to potentially update the plan's schema | ||
| if transformed_plan.transformed { | ||
| transformed_plan.map_data(|plan| plan.recompute_schema()) | ||
|
|
@@ -473,6 +481,19 @@ fn optimize_projections( | |
|
|
||
| /// Merges consecutive projections. | ||
| /// | ||
| /// Optimizes uncorrelated subquery plans embedded in expressions of the given | ||
| /// plan node (e.g., `Expr::ScalarSubquery`). `map_children` only visits direct | ||
| /// plan inputs, so subqueries must be handled separately. | ||
| fn optimize_subqueries( | ||
| plan: LogicalPlan, | ||
| config: &dyn OptimizerConfig, | ||
| ) -> Result<Transformed<LogicalPlan>> { | ||
| plan.map_uncorrelated_subqueries(|subquery_plan| { | ||
| let indices = RequiredIndices::new_for_all_exprs(&subquery_plan); | ||
| optimize_projections(subquery_plan, config, indices) | ||
| }) | ||
| } | ||
|
|
||
| /// Given a projection `proj`, this function attempts to merge it with a previous | ||
| /// projection if it exists and if merging is beneficial. Merging is considered | ||
| /// beneficial when expressions in the current projection are non-trivial and | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we really need to up the limit? this repo gets checked out a lot
What is so large that required increasing to 2MB?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed this because
pbjson.rsstarted to exceed the limit (this PR only increases its size slightly, but it is only a hair under 1MB in mainline).We could certainly make the limit tighter (e.g., 1.2MB) -- or if there's a different approach you prefer, lmk.