Skip to content

Commit 9fe1bf4

Browse files
committed
use OnceCell for lazy initialization in Execute
1 parent d2b8da5 commit 9fe1bf4

File tree

1 file changed

+16
-24
lines changed

1 file changed

+16
-24
lines changed

datafusion/physical-plan/src/joins/hash_join.rs

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
use std::fmt;
2121
use std::mem::size_of;
2222
use std::sync::atomic::{AtomicUsize, Ordering};
23-
use std::sync::Arc;
23+
use std::sync::{Arc, OnceLock};
2424
use std::task::Poll;
2525
use std::{any::Any, vec};
2626

@@ -517,7 +517,8 @@ pub struct HashJoinExec {
517517
/// Dynamic filter for pushing down to the probe side
518518
dynamic_filter: Arc<DynamicFilterPhysicalExpr>,
519519
/// Shared bounds accumulator for coordinating dynamic filter updates across partitions
520-
bounds_accumulator: Arc<SharedBoundsAccumulator>,
520+
/// Lazily initialized at execution time to use actual runtime partition counts
521+
bounds_accumulator: Arc<OnceLock<Arc<SharedBoundsAccumulator>>>,
521522
}
522523

523524
impl HashJoinExec {
@@ -566,12 +567,7 @@ impl HashJoinExec {
566567

567568
let dynamic_filter = Self::create_dynamic_filter(&on);
568569

569-
let bounds_accumulator =
570-
Arc::new(SharedBoundsAccumulator::new_from_partition_mode(
571-
partition_mode,
572-
left.as_ref(),
573-
right.as_ref(),
574-
));
570+
let bounds_accumulator = Arc::new(OnceLock::new());
575571

576572
Ok(HashJoinExec {
577573
left,
@@ -953,14 +949,7 @@ impl ExecutionPlan for HashJoinExec {
953949
///
954950
/// This method is called during query optimization when the optimizer creates new
955951
/// plan nodes. Importantly, it creates a fresh bounds_accumulator via `try_new`
956-
/// rather than cloning the existing one, because:
957-
///
958-
/// 1. The new child plans may have different partition counts, requiring a new
959-
/// bounds_accumulator with the correct total_partitions count
960-
/// 2. The accumulator contains execution state (completed_partitions, bounds)
961-
/// that should be reset for the new execution context
962-
/// 3. The dynamic_filter is preserved separately to maintain filter state
963-
/// across plan transformations
952+
/// rather than cloning the existing one because partitioning may have changed.
964953
fn with_new_children(
965954
self: Arc<Self>,
966955
children: Vec<Arc<dyn ExecutionPlan>>,
@@ -998,13 +987,7 @@ impl ExecutionPlan for HashJoinExec {
998987
null_equality: self.null_equality,
999988
cache: self.cache.clone(),
1000989
dynamic_filter: Self::create_dynamic_filter(&self.on),
1001-
bounds_accumulator: Arc::new(
1002-
SharedBoundsAccumulator::new_from_partition_mode(
1003-
self.mode,
1004-
self.left.as_ref(),
1005-
self.right.as_ref(),
1006-
),
1007-
),
990+
bounds_accumulator: Arc::new(OnceLock::new()),
1008991
}))
1009992
}
1010993

@@ -1094,6 +1077,15 @@ impl ExecutionPlan for HashJoinExec {
10941077

10951078
let batch_size = context.session_config().batch_size();
10961079

1080+
// Initialize bounds_accumulator lazily with runtime partition counts
1081+
let bounds_accumulator = Arc::clone(self.bounds_accumulator.get_or_init(|| {
1082+
Arc::new(SharedBoundsAccumulator::new_from_partition_mode(
1083+
self.mode,
1084+
self.left.as_ref(),
1085+
self.right.as_ref(),
1086+
))
1087+
}));
1088+
10971089
// we have the batches and the hash map with their keys. We can how create a stream
10981090
// over the right that uses this information to issue new batches.
10991091
let right_stream = self.right.execute(partition, context)?;
@@ -1122,7 +1114,7 @@ impl ExecutionPlan for HashJoinExec {
11221114
batch_size,
11231115
hashes_buffer: vec![],
11241116
right_side_ordered: self.right.output_ordering().is_some(),
1125-
bounds_accumulator: Arc::clone(&self.bounds_accumulator),
1117+
bounds_accumulator,
11261118
dynamic_filter: enable_dynamic_filter_pushdown
11271119
.then_some(Arc::clone(&self.dynamic_filter)),
11281120
}))

0 commit comments

Comments
 (0)