Closed
Description
Describe the bug
#12003 introduced a bug where sometimes the OFFSET
clause is accidentally removed
I am filing this ticket to track the regression / know it exists
To Reproduce
@wiedld created a reproducer here b6fd751
Add this to the order.slt
file:
# all results
query II
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc;
----
3 25
2 25
1 0
0 0
# limit only
query II
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc LIMIT 3;
----
3 25
2 25
1 0
# offset only
query II
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc OFFSET 1;
----
2 25
1 0
0 0
# TODO: fix this to properly apply offset
# offset + limit
query II
SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc OFFSET 1 LIMIT 2;
----
3 25
2 25
1 0
# TODO: fix this to not remove the skip=1 during the limit pushdown
# Applying offset & limit when multiple streams from groupby
query TT
EXPLAIN SELECT b, sum(a) FROM ordered_table GROUP BY b order by b desc OFFSET 1 LIMIT 2;
----
logical_plan
01)Limit: skip=1, fetch=2
02)--Sort: ordered_table.b DESC NULLS FIRST, fetch=3
03)----Aggregate: groupBy=[[ordered_table.b]], aggr=[[sum(CAST(ordered_table.a AS Int64))]]
04)------TableScan: ordered_table projection=[a, b]
physical_plan
01)SortPreservingMergeExec: [b@0 DESC], fetch=3
02)--SortExec: TopK(fetch=3), expr=[b@0 DESC], preserve_partitioning=[true]
03)----AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(ordered_table.a)]
04)------CoalesceBatchesExec: target_batch_size=8192
05)--------RepartitionExec: partitioning=Hash([b@0], 2), input_partitions=2
06)----------AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(ordered_table.a)]
07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], has_header=true
# TODO: fix this to not remove the skip=4 during the limit pushdown
# Applying offset & limit when multiple streams from union
query TT
explain select * FROM (
select c FROM ordered_table
UNION ALL
select d FROM ordered_table
) order by 1 desc LIMIT 10 OFFSET 4;
----
logical_plan
01)Limit: skip=4, fetch=10
02)--Sort: ordered_table.c DESC NULLS FIRST, fetch=14
03)----Union
04)------Projection: CAST(ordered_table.c AS Int64) AS c
05)--------TableScan: ordered_table projection=[c]
06)------Projection: CAST(ordered_table.d AS Int64) AS c
07)--------TableScan: ordered_table projection=[d]
physical_plan
01)SortPreservingMergeExec: [c@0 DESC], fetch=14
02)--UnionExec
03)----SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true]
04)------ProjectionExec: expr=[CAST(c@0 AS Int64) as c]
05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], has_header=true
07)----SortExec: TopK(fetch=14), expr=[c@0 DESC], preserve_partitioning=[true]
08)------ProjectionExec: expr=[CAST(d@0 AS Int64) as c]
09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[d], has_header=true
# ApplyingmLIMIT & OFFSET to subquery.
query III
select t1.b, c, c2 FROM (
select b, c FROM ordered_table ORDER BY b desc, c desc OFFSET 1 LIMIT 4
) as t1 INNER JOIN (
select b, c as c2 FROM ordered_table ORDER BY b desc, d desc OFFSET 1 LIMIT 4
) as t2
ON t1.b = t2.b
ORDER BY t1.b desc, c desc, c2 desc;
----
3 98 96
3 98 89
3 98 82
3 98 79
3 97 96
3 97 89
3 97 82
3 97 79
3 96 96
3 96 89
3 96 82
3 96 79
3 95 96
3 95 89
3 95 82
3 95 79
# TODO: fix this does not correctly work.
# Apply OFFSET & LIMIT to both parent and child (subquery).
query III
select t1.b, c, c2 FROM (
select b, c FROM ordered_table ORDER BY b desc, c desc OFFSET 1 LIMIT 4
) as t1 INNER JOIN (
select b, c as c2 FROM ordered_table ORDER BY b desc, d desc OFFSET 1 LIMIT 4
) as t2
ON t1.b = t2.b
ORDER BY t1.b desc, c desc, c2 desc
OFFSET 3 LIMIT 2;
----
3 99 96
3 99 89
3 99 87
3 99 82
3 99 79
Expected behavior
Query should not lose the offset
Additional context
No response