Skip to content

Commit 533ddcb

Browse files
authored
Provide documentation of expose APIs to enable handling of type coercion at UNION plan construction. (#12142)
* chore(12105): enable union type-coercion by two approaches, using newly pub interfaces * chore(12105): update documentation to delineate btwn the interfaces involved in type coercion * chore((12105): update union() logical plan construction docs, to address type coercion
1 parent 0c75ddd commit 533ddcb

File tree

2 files changed

+28
-6
lines changed

2 files changed

+28
-6
lines changed

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,17 @@ pub fn validate_unique_names<'a>(
13311331
})
13321332
}
13331333

1334-
/// Union two logical plans.
1334+
/// Union two [`LogicalPlan`]s.
1335+
///
1336+
/// Constructs the UNION plan, but does not perform type-coercion. Therefore the
1337+
/// subtree expressions will not be properly typed until the optimizer pass.
1338+
///
1339+
/// If a properly typed UNION plan is needed, refer to [`TypeCoercionRewriter::coerce_union`]
1340+
/// or alternatively, merge the union input schema using [`coerce_union_schema`] and
1341+
/// apply the expression rewrite with [`coerce_plan_expr_for_schema`].
1342+
///
1343+
/// [`TypeCoercionRewriter::coerce_union`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/struct.TypeCoercionRewriter.html#method.coerce_union
1344+
/// [`coerce_union_schema`]: https://docs.rs/datafusion-optimizer/latest/datafusion_optimizer/analyzer/type_coercion/fn.coerce_union_schema.html
13351345
pub fn union(left_plan: LogicalPlan, right_plan: LogicalPlan) -> Result<LogicalPlan> {
13361346
// Temporarily use the schema from the left input and later rely on the analyzer to
13371347
// coerce the two schemas into a common one.

datafusion/optimizer/src/analyzer/type_coercion.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ use datafusion_expr::{
5656
Projection, ScalarUDF, Union, WindowFrame, WindowFrameBound, WindowFrameUnits,
5757
};
5858

59+
/// Performs type coercion by determining the schema
60+
/// and performing the expression rewrites.
5961
#[derive(Default)]
6062
pub struct TypeCoercion {}
6163

@@ -128,16 +130,23 @@ fn analyze_internal(
128130
.map_data(|plan| plan.recompute_schema())
129131
}
130132

131-
pub(crate) struct TypeCoercionRewriter<'a> {
133+
/// Rewrite expressions to apply type coercion.
134+
pub struct TypeCoercionRewriter<'a> {
132135
pub(crate) schema: &'a DFSchema,
133136
}
134137

135138
impl<'a> TypeCoercionRewriter<'a> {
139+
/// Create a new [`TypeCoercionRewriter`] with a provided schema
140+
/// representing both the inputs and output of the [`LogicalPlan`] node.
136141
fn new(schema: &'a DFSchema) -> Self {
137142
Self { schema }
138143
}
139144

140-
fn coerce_plan(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
145+
/// Coerce the [`LogicalPlan`].
146+
///
147+
/// Refer to [`TypeCoercionRewriter::coerce_join`] and [`TypeCoercionRewriter::coerce_union`]
148+
/// for type-coercion approach.
149+
pub fn coerce_plan(&mut self, plan: LogicalPlan) -> Result<LogicalPlan> {
141150
match plan {
142151
LogicalPlan::Join(join) => self.coerce_join(join),
143152
LogicalPlan::Union(union) => Self::coerce_union(union),
@@ -153,7 +162,7 @@ impl<'a> TypeCoercionRewriter<'a> {
153162
///
154163
/// For example, on_exprs like `t1.a = t2.b AND t1.x = t2.y` will be stored
155164
/// as a list of `(t1.a, t2.b), (t1.x, t2.y)`
156-
fn coerce_join(&mut self, mut join: Join) -> Result<LogicalPlan> {
165+
pub fn coerce_join(&mut self, mut join: Join) -> Result<LogicalPlan> {
157166
join.on = join
158167
.on
159168
.into_iter()
@@ -176,7 +185,7 @@ impl<'a> TypeCoercionRewriter<'a> {
176185

177186
/// Coerce the union’s inputs to a common schema compatible with all inputs.
178187
/// This occurs after wildcard expansion and the coercion of the input expressions.
179-
fn coerce_union(union_plan: Union) -> Result<LogicalPlan> {
188+
pub fn coerce_union(union_plan: Union) -> Result<LogicalPlan> {
180189
let union_schema = Arc::new(coerce_union_schema(&union_plan.inputs)?);
181190
let new_inputs = union_plan
182191
.inputs
@@ -809,7 +818,10 @@ fn coerce_case_expression(case: Case, schema: &DFSchema) -> Result<Case> {
809818
}
810819

811820
/// Get a common schema that is compatible with all inputs of UNION.
812-
fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
821+
///
822+
/// This method presumes that the wildcard expansion is unneeded, or has already
823+
/// been applied.
824+
pub fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
813825
let base_schema = inputs[0].schema();
814826
let mut union_datatypes = base_schema
815827
.fields()

0 commit comments

Comments
 (0)