Skip to content

Commit 9aacdee

Browse files
[MINOR]:Do not introduce unnecessary repartition when row count is 1. (#7832)
* Initial commit * Fix failing tests * More idiomatic expressions * Update tests, use batch size during partition benefit check * Fix failing tests * is_exact when row count is 1 --------- Co-authored-by: Mehmet Ozan Kabak <ozankabak@gmail.com>
1 parent cb2d03c commit 9aacdee

File tree

14 files changed

+270
-214
lines changed

14 files changed

+270
-214
lines changed

datafusion/core/src/datasource/listing/table.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,10 +1615,12 @@ mod tests {
16151615

16161616
#[tokio::test]
16171617
async fn test_insert_into_append_new_json_files() -> Result<()> {
1618+
let mut config_map: HashMap<String, String> = HashMap::new();
1619+
config_map.insert("datafusion.execution.batch_size".into(), "1".into());
16181620
helper_test_append_new_files_to_table(
16191621
FileType::JSON,
16201622
FileCompressionType::UNCOMPRESSED,
1621-
None,
1623+
Some(config_map),
16221624
)
16231625
.await?;
16241626
Ok(())
@@ -1637,21 +1639,25 @@ mod tests {
16371639

16381640
#[tokio::test]
16391641
async fn test_insert_into_append_new_csv_files() -> Result<()> {
1642+
let mut config_map: HashMap<String, String> = HashMap::new();
1643+
config_map.insert("datafusion.execution.batch_size".into(), "1".into());
16401644
helper_test_append_new_files_to_table(
16411645
FileType::CSV,
16421646
FileCompressionType::UNCOMPRESSED,
1643-
None,
1647+
Some(config_map),
16441648
)
16451649
.await?;
16461650
Ok(())
16471651
}
16481652

16491653
#[tokio::test]
16501654
async fn test_insert_into_append_new_parquet_files_defaults() -> Result<()> {
1655+
let mut config_map: HashMap<String, String> = HashMap::new();
1656+
config_map.insert("datafusion.execution.batch_size".into(), "1".into());
16511657
helper_test_append_new_files_to_table(
16521658
FileType::PARQUET,
16531659
FileCompressionType::UNCOMPRESSED,
1654-
None,
1660+
Some(config_map),
16551661
)
16561662
.await?;
16571663
Ok(())
@@ -1838,6 +1844,7 @@ mod tests {
18381844
"datafusion.execution.parquet.write_batch_size".into(),
18391845
"5".into(),
18401846
);
1847+
config_map.insert("datafusion.execution.batch_size".into(), "1".into());
18411848
helper_test_append_new_files_to_table(
18421849
FileType::PARQUET,
18431850
FileCompressionType::UNCOMPRESSED,

datafusion/core/src/physical_optimizer/enforce_distribution.rs

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,7 @@ fn add_hash_on_top(
10211021
// until Repartition(Hash).
10221022
dist_onward: &mut Option<ExecTree>,
10231023
input_idx: usize,
1024+
repartition_beneficial_stats: bool,
10241025
) -> Result<Arc<dyn ExecutionPlan>> {
10251026
if n_target == input.output_partitioning().partition_count() && n_target == 1 {
10261027
// In this case adding a hash repartition is unnecessary as the hash
@@ -1044,9 +1045,13 @@ fn add_hash_on_top(
10441045
// - Usage of order preserving variants is not desirable (per the flag
10451046
// `config.optimizer.bounded_order_preserving_variants`).
10461047
let should_preserve_ordering = input.output_ordering().is_some();
1047-
// Since hashing benefits from partitioning, add a round-robin repartition
1048-
// before it:
1049-
let mut new_plan = add_roundrobin_on_top(input, n_target, dist_onward, 0)?;
1048+
let mut new_plan = if repartition_beneficial_stats {
1049+
// Since hashing benefits from partitioning, add a round-robin repartition
1050+
// before it:
1051+
add_roundrobin_on_top(input, n_target, dist_onward, 0)?
1052+
} else {
1053+
input
1054+
};
10501055
new_plan = Arc::new(
10511056
RepartitionExec::try_new(new_plan, Partitioning::Hash(hash_exprs, n_target))?
10521057
.with_preserve_order(should_preserve_ordering),
@@ -1223,6 +1228,7 @@ fn ensure_distribution(
12231228
let enable_round_robin = config.optimizer.enable_round_robin_repartition;
12241229
let repartition_file_scans = config.optimizer.repartition_file_scans;
12251230
let repartition_file_min_size = config.optimizer.repartition_file_min_size;
1231+
let batch_size = config.execution.batch_size;
12261232
let is_unbounded = unbounded_output(&dist_context.plan);
12271233
// Use order preserving variants either of the conditions true
12281234
// - it is desired according to config
@@ -1233,13 +1239,7 @@ fn ensure_distribution(
12331239
if dist_context.plan.children().is_empty() {
12341240
return Ok(Transformed::No(dist_context));
12351241
}
1236-
// Don't need to apply when the returned row count is not greater than 1:
1237-
let stats = dist_context.plan.statistics();
1238-
let mut repartition_beneficial_stat = true;
1239-
if stats.is_exact {
1240-
repartition_beneficial_stat =
1241-
stats.num_rows.map(|num_rows| num_rows > 1).unwrap_or(true);
1242-
}
1242+
12431243
// Remove unnecessary repartition from the physical plan if any
12441244
let DistributionContext {
12451245
mut plan,
@@ -1263,7 +1263,6 @@ fn ensure_distribution(
12631263
plan = updated_window;
12641264
}
12651265
};
1266-
12671266
let n_children = plan.children().len();
12681267
// This loop iterates over all the children to:
12691268
// - Increase parallelism for every child if it is beneficial.
@@ -1289,9 +1288,19 @@ fn ensure_distribution(
12891288
maintains,
12901289
child_idx,
12911290
)| {
1291+
// Don't need to apply when the returned row count is not greater than 1:
1292+
let stats = child.statistics();
1293+
let repartition_beneficial_stats = if stats.is_exact {
1294+
stats
1295+
.num_rows
1296+
.map(|num_rows| num_rows > batch_size)
1297+
.unwrap_or(true)
1298+
} else {
1299+
true
1300+
};
12921301
if enable_round_robin
12931302
// Operator benefits from partitioning (e.g. filter):
1294-
&& (would_benefit && repartition_beneficial_stat)
1303+
&& (would_benefit && repartition_beneficial_stats)
12951304
// Unless partitioning doesn't increase the partition count, it is not beneficial:
12961305
&& child.output_partitioning().partition_count() < target_partitions
12971306
{
@@ -1340,6 +1349,7 @@ fn ensure_distribution(
13401349
target_partitions,
13411350
dist_onward,
13421351
child_idx,
1352+
repartition_beneficial_stats,
13431353
)?;
13441354
}
13451355
Distribution::UnspecifiedDistribution => {}

datafusion/core/tests/sql/order.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,16 +209,16 @@ ORDER BY 1, 2;
209209
" AggregateExec: mode=FinalPartitioned, gby=[Int64(0)@0 as Int64(0), t@1 as t], aggr=[]",
210210
" CoalesceBatchesExec: target_batch_size=8192",
211211
" RepartitionExec: partitioning=Hash([Int64(0)@0, t@1], 2), input_partitions=2",
212-
" AggregateExec: mode=Partial, gby=[0 as Int64(0), t@0 as t], aggr=[]",
213-
" RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
212+
" RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
213+
" AggregateExec: mode=Partial, gby=[0 as Int64(0), t@0 as t], aggr=[]",
214214
" ProjectionExec: expr=[column1@0 as t]",
215215
" ValuesExec",
216216
" ProjectionExec: expr=[Int64(1)@0 as m, t@1 as t]",
217217
" AggregateExec: mode=FinalPartitioned, gby=[Int64(1)@0 as Int64(1), t@1 as t], aggr=[]",
218218
" CoalesceBatchesExec: target_batch_size=8192",
219219
" RepartitionExec: partitioning=Hash([Int64(1)@0, t@1], 2), input_partitions=2",
220-
" AggregateExec: mode=Partial, gby=[1 as Int64(1), t@0 as t], aggr=[]",
221-
" RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
220+
" RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
221+
" AggregateExec: mode=Partial, gby=[1 as Int64(1), t@0 as t], aggr=[]",
222222
" ProjectionExec: expr=[column1@0 as t]",
223223
" ValuesExec",
224224
];

datafusion/physical-plan/src/aggregates/mod.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,12 +1021,21 @@ impl ExecutionPlan for AggregateExec {
10211021
..Default::default()
10221022
}
10231023
}
1024-
_ => Statistics {
1025-
// the output row count is surely not larger than its input row count
1026-
num_rows: self.input.statistics().num_rows,
1027-
is_exact: false,
1028-
..Default::default()
1029-
},
1024+
_ => {
1025+
let input_stats = self.input.statistics();
1026+
// Input statistics is exact and number of rows not greater than 1:
1027+
let is_exact = input_stats.is_exact
1028+
&& (input_stats
1029+
.num_rows
1030+
.map(|num_rows| num_rows == 1)
1031+
.unwrap_or(false));
1032+
Statistics {
1033+
// the output row count is surely not larger than its input row count
1034+
num_rows: self.input.statistics().num_rows,
1035+
is_exact,
1036+
..Default::default()
1037+
}
1038+
}
10301039
}
10311040
}
10321041
}

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2342,8 +2342,8 @@ GlobalLimitExec: skip=0, fetch=4
23422342
------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
23432343
--------CoalesceBatchesExec: target_batch_size=8192
23442344
----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
2345-
------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
2346-
--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2345+
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2346+
--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
23472347
----------------MemoryExec: partitions=1, partition_sizes=[1]
23482348

23492349

@@ -2397,8 +2397,8 @@ GlobalLimitExec: skip=0, fetch=4
23972397
------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
23982398
--------CoalesceBatchesExec: target_batch_size=8192
23992399
----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
2400-
------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
2401-
--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2400+
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2401+
--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)], lim=[4]
24022402
----------------MemoryExec: partitions=1, partition_sizes=[1]
24032403

24042404
query TT
@@ -2416,8 +2416,8 @@ GlobalLimitExec: skip=0, fetch=4
24162416
------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
24172417
--------CoalesceBatchesExec: target_batch_size=8192
24182418
----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
2419-
------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
2420-
--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2419+
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2420+
--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MIN(traces.timestamp)]
24212421
----------------MemoryExec: partitions=1, partition_sizes=[1]
24222422

24232423
query TT
@@ -2435,8 +2435,8 @@ GlobalLimitExec: skip=0, fetch=4
24352435
------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
24362436
--------CoalesceBatchesExec: target_batch_size=8192
24372437
----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
2438-
------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
2439-
--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2438+
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2439+
--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
24402440
----------------MemoryExec: partitions=1, partition_sizes=[1]
24412441

24422442
query TT
@@ -2454,8 +2454,8 @@ GlobalLimitExec: skip=0, fetch=4
24542454
------AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
24552455
--------CoalesceBatchesExec: target_batch_size=8192
24562456
----------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
2457-
------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
2458-
--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2457+
------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
2458+
--------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[MAX(traces.timestamp)]
24592459
----------------MemoryExec: partitions=1, partition_sizes=[1]
24602460

24612461
query TI

datafusion/sqllogictest/test_files/copy.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ CopyTo: format=parquet output_url=test_files/scratch/copy/table single_file_outp
3333
--TableScan: source_table projection=[col1, col2]
3434
physical_plan
3535
InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[])
36-
--MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
36+
--MemoryExec: partitions=1, partition_sizes=[1]
3737

3838
# Error case
3939
query error DataFusion error: Invalid or Unsupported Configuration: Format not explicitly set and unable to get file extension!

0 commit comments

Comments
 (0)