Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion ballista/rust/core/proto/ballista.proto
Original file line number Diff line number Diff line change
Expand Up @@ -565,12 +565,17 @@ message CsvScanExecNode {
repeated string filename = 8;
}

enum PartitionMode {
COLLECT_LEFT = 0;
PARTITIONED = 1;
}

message HashJoinExecNode {
PhysicalPlanNode left = 1;
PhysicalPlanNode right = 2;
repeated JoinOn on = 3;
JoinType join_type = 4;

PartitionMode partition_mode = 6;
}

message PhysicalColumn {
Expand Down
14 changes: 13 additions & 1 deletion ballista/rust/core/src/serde/physical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,24 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
protobuf::JoinType::Semi => JoinType::Semi,
protobuf::JoinType::Anti => JoinType::Anti,
};
let partition_mode =
protobuf::PartitionMode::from_i32(hashjoin.partition_mode)
.ok_or_else(|| {
proto_error(format!(
"Received a HashJoinNode message with unknown PartitionMode {}",
hashjoin.partition_mode
))
})?;
let partition_mode = match partition_mode {
protobuf::PartitionMode::CollectLeft => PartitionMode::CollectLeft,
protobuf::PartitionMode::Partitioned => PartitionMode::Partitioned,
};
Ok(Arc::new(HashJoinExec::try_new(
left,
right,
on,
&join_type,
PartitionMode::CollectLeft,
partition_mode,
)?))
}
PhysicalPlanType::ShuffleReader(shuffle_reader) => {
Expand Down
30 changes: 23 additions & 7 deletions ballista/rust/core/src/serde/physical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,29 @@ mod roundtrip_tests {
Column::new("col", schema_right.index_of("col")?),
)];

roundtrip_test(Arc::new(HashJoinExec::try_new(
Arc::new(EmptyExec::new(false, Arc::new(schema_left))),
Arc::new(EmptyExec::new(false, Arc::new(schema_right))),
on,
&JoinType::Inner,
PartitionMode::CollectLeft,
)?))
let schema_left = Arc::new(schema_left);
let schema_right = Arc::new(schema_right);
for join_type in &[
JoinType::Inner,
JoinType::Left,
JoinType::Right,
JoinType::Full,
JoinType::Anti,
JoinType::Semi,
] {
for partition_mode in
&[PartitionMode::Partitioned, PartitionMode::CollectLeft]
{
roundtrip_test(Arc::new(HashJoinExec::try_new(
Arc::new(EmptyExec::new(false, schema_left.clone())),
Arc::new(EmptyExec::new(false, schema_right.clone())),
on.clone(),
&join_type,
*partition_mode,
)?))?;
}
}
Ok(())
}

#[test]
Expand Down
7 changes: 6 additions & 1 deletion ballista/rust/core/src/serde/physical_plan/to_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use datafusion::physical_plan::expressions::{
use datafusion::physical_plan::expressions::{CastExpr, TryCastExpr};
use datafusion::physical_plan::filter::FilterExec;
use datafusion::physical_plan::hash_aggregate::AggregateMode;
use datafusion::physical_plan::hash_join::HashJoinExec;
use datafusion::physical_plan::hash_join::{HashJoinExec, PartitionMode};
use datafusion::physical_plan::hash_utils::JoinType;
use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
use datafusion::physical_plan::parquet::ParquetExec;
Expand Down Expand Up @@ -143,13 +143,18 @@ impl TryInto<protobuf::PhysicalPlanNode> for Arc<dyn ExecutionPlan> {
JoinType::Semi => protobuf::JoinType::Semi,
JoinType::Anti => protobuf::JoinType::Anti,
};
let partition_mode = match exec.partition_mode() {
PartitionMode::CollectLeft => protobuf::PartitionMode::CollectLeft,
PartitionMode::Partitioned => protobuf::PartitionMode::Partitioned,
};
Ok(protobuf::PhysicalPlanNode {
physical_plan_type: Some(PhysicalPlanType::HashJoin(Box::new(
protobuf::HashJoinExecNode {
left: Some(Box::new(left)),
right: Some(Box::new(right)),
on,
join_type: join_type.into(),
partition_mode: partition_mode.into(),
},
))),
})
Expand Down
5 changes: 5 additions & 0 deletions datafusion/src/physical_plan/hash_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ impl HashJoinExec {
&self.join_type
}

/// The partitioning mode of this hash join
pub fn partition_mode(&self) -> &PartitionMode {
&self.mode
}

/// Calculates column indices and left/right placement on input / output schemas and jointype
fn column_indices_from_schema(&self) -> ArrowResult<Vec<ColumnIndex>> {
let (primary_is_left, primary_schema, secondary_schema) = match self.join_type {
Expand Down