Skip to content

Commit 0959703

Browse files
joseph-isaacsa10y
authored andcommitted
feat[vortex-expr]: add a is_null_sensitive expression analysis (#5528)
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 13b0e54 commit 0959703

File tree

28 files changed

+319
-34
lines changed

28 files changed

+319
-34
lines changed

vortex-array/src/arrays/struct_/vtable/reduce.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ use crate::IntoArray;
1616
use crate::arrays::ExprArray;
1717
use crate::arrays::StructArray;
1818
use crate::expr::Expression;
19+
use crate::expr::analysis::immediate_access::annotate_scope_access;
1920
use crate::expr::col;
2021
use crate::expr::root;
2122
use crate::expr::session::ExprSession;
2223
use crate::expr::transform::ExprOptimizer;
2324
use crate::expr::transform::PartitionedExpr;
24-
use crate::expr::transform::immediate_access::annotate_scope_access;
2525
use crate::expr::transform::partition;
2626
use crate::expr::transform::replace;
2727
use crate::expr::transform::replace_root_fields;

vortex-array/src/expr/analysis.rs

Lines changed: 0 additions & 20 deletions
This file was deleted.

vortex-array/src/expr/transform/immediate_access.rs renamed to vortex-array/src/expr/analysis/immediate_access.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ use vortex_error::VortexExpect;
77
use vortex_utils::aliases::hash_set::HashSet;
88

99
use crate::expr::Expression;
10+
use crate::expr::analysis::AnnotationFn;
11+
use crate::expr::analysis::Annotations;
12+
use crate::expr::descendent_annotations;
1013
use crate::expr::exprs::get_item::GetItem;
1114
use crate::expr::exprs::root::Root;
1215
use crate::expr::exprs::select::Select;
13-
use crate::expr::transform::annotations::AnnotationFn;
14-
use crate::expr::transform::annotations::Annotations;
15-
use crate::expr::transform::annotations::descendent_annotations;
1616

1717
pub type FieldAccesses<'a> = Annotations<'a, FieldName>;
1818

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_error::VortexExpect;
5+
use vortex_error::VortexResult;
6+
use vortex_utils::aliases::hash_map::HashMap;
7+
8+
use crate::expr::Expression;
9+
use crate::expr::traversal::NodeExt;
10+
use crate::expr::traversal::NodeVisitor;
11+
use crate::expr::traversal::TraversalOrder;
12+
13+
/// Label each node in an expression tree using a bottom-up traversal.
14+
///
15+
/// This function separates tree labeling into two distinct steps:
16+
/// 1. **Label Self**: Compute a label for each node based only on the node itself
17+
/// 2. **Merge Child**: Fold/accumulate labels from children into the node's self-label
18+
///
19+
/// The labeling process:
20+
/// - First, `self_label` is called on the node to produce its self-label
21+
/// - Then, for each child, `merge_child` is called with `(self_label, child_label)`
22+
/// to fold the child label into the self_label
23+
/// - This produces the final label for the node
24+
///
25+
/// # Parameters
26+
///
27+
/// - `expr`: The root expression to label
28+
/// - `self_label`: Function that computes a label for a single node
29+
/// - `merge_child`: Mutable function that folds child labels into an accumulator.
30+
/// Takes `(self_label, child_label)` and returns the updated accumulator.
31+
/// Called once per child, with the initial accumulator being the node's self-label.
32+
///
33+
pub fn label_tree<L: Clone>(
34+
expr: &Expression,
35+
self_label: impl Fn(&Expression) -> L,
36+
mut merge_child: impl FnMut(L, &L) -> L,
37+
) -> HashMap<&Expression, L> {
38+
let mut visitor = LabelingVisitor {
39+
labels: Default::default(),
40+
self_label,
41+
merge_child: &mut merge_child,
42+
};
43+
expr.accept(&mut visitor)
44+
.vortex_expect("LabelingVisitor is infallible");
45+
visitor.labels
46+
}
47+
48+
struct LabelingVisitor<'a, 'b, L, F, G>
49+
where
50+
F: Fn(&Expression) -> L,
51+
G: FnMut(L, &L) -> L,
52+
{
53+
labels: HashMap<&'a Expression, L>,
54+
self_label: F,
55+
merge_child: &'b mut G,
56+
}
57+
58+
impl<'a, 'b, L: Clone, F, G> NodeVisitor<'a> for LabelingVisitor<'a, 'b, L, F, G>
59+
where
60+
F: Fn(&Expression) -> L,
61+
G: FnMut(L, &L) -> L,
62+
{
63+
type NodeTy = Expression;
64+
65+
fn visit_up(&mut self, node: &'a Expression) -> VortexResult<TraversalOrder> {
66+
let self_label = (self.self_label)(node);
67+
68+
let final_label = node.children().iter().fold(self_label, |acc, child| {
69+
let child_label = self
70+
.labels
71+
.get(child)
72+
.vortex_expect("child must have label");
73+
(self.merge_child)(acc, child_label)
74+
});
75+
76+
self.labels.insert(node, final_label);
77+
78+
Ok(TraversalOrder::Continue)
79+
}
80+
}
81+
82+
#[cfg(test)]
83+
mod tests {
84+
use super::*;
85+
use crate::expr::exprs::binary::eq;
86+
use crate::expr::exprs::get_item::col;
87+
use crate::expr::exprs::literal::lit;
88+
89+
#[test]
90+
fn test_tree_depth() {
91+
// Expression: $.col1 = 5
92+
// Tree: eq(get_item(root(), "col1"), lit(5))
93+
// Depth: root = 1, get_item = 2, lit = 1, eq = 3
94+
let expr = eq(col("col1"), lit(5));
95+
let depths = label_tree(
96+
&expr,
97+
|_node| 1, // Each node has depth 1 by itself
98+
|self_depth, child_depth| self_depth.max(*child_depth + 1),
99+
);
100+
101+
// The root (eq) should have depth 3
102+
assert_eq!(depths.get(&expr), Some(&3));
103+
}
104+
105+
#[test]
106+
fn test_node_count() {
107+
// Count total nodes in subtree (including self)
108+
// Tree: eq(get_item(root(), "col1"), lit(5))
109+
// Nodes: eq, get_item, root, lit = 4
110+
let expr = eq(col("col1"), lit(5));
111+
let counts = label_tree(
112+
&expr,
113+
|_node| 1, // Each node counts as 1
114+
|self_count, child_count| self_count + *child_count,
115+
);
116+
117+
// Root should have count of 4 (eq, get_item, root, lit)
118+
assert_eq!(counts.get(&expr), Some(&4));
119+
}
120+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
pub mod annotation;
5+
pub mod immediate_access;
6+
mod labeling;
7+
mod null_sensitive;
8+
9+
pub use annotation::*;
10+
pub use immediate_access::*;
11+
pub use labeling::*;
12+
pub use null_sensitive::*;
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_utils::aliases::hash_map::HashMap;
5+
6+
use super::labeling::label_tree;
7+
use crate::expr::Expression;
8+
9+
pub type NullSensitiveLabels<'a> = HashMap<&'a Expression, bool>;
10+
11+
/// Label each expression in the tree with whether it is null-sensitive.
12+
///
13+
/// See [`crate::expr::VTable::is_null_sensitive`] for a definition of null sensitivity.
14+
/// This function operates on a tree of expressions, not just a single expression.
15+
pub fn label_null_sensitive(expr: &Expression) -> NullSensitiveLabels<'_> {
16+
label_tree(
17+
expr,
18+
|expr| expr.is_null_sensitive(),
19+
|acc, &child| acc | child,
20+
)
21+
}
22+
23+
#[cfg(test)]
24+
mod tests {
25+
use super::*;
26+
use crate::expr::exprs::binary::eq;
27+
use crate::expr::exprs::get_item::col;
28+
use crate::expr::exprs::is_null::is_null;
29+
use crate::expr::exprs::literal::lit;
30+
31+
#[test]
32+
fn test_null_sensitive_with_is_null() {
33+
let expr = is_null(col("col1"));
34+
let labels = label_null_sensitive(&expr);
35+
36+
// The root expression should be null-sensitive
37+
assert_eq!(labels.get(&expr), Some(&true));
38+
}
39+
40+
#[test]
41+
fn test_null_sensitive_without_is_null() {
42+
let expr = eq(col("col1"), lit(5));
43+
let labels = label_null_sensitive(&expr);
44+
45+
// Since the default is conservative (true), all expressions are sensitive
46+
assert_eq!(labels.get(&expr), Some(&true));
47+
}
48+
49+
#[test]
50+
fn test_null_sensitive_nested() {
51+
let left = eq(col("col1"), lit(5));
52+
let right = is_null(col("col2"));
53+
let expr = eq(left.clone(), right.clone());
54+
55+
let labels = label_null_sensitive(&expr);
56+
57+
// With conservative defaults, all are sensitive
58+
assert_eq!(labels.get(&left), Some(&true));
59+
assert_eq!(labels.get(&right), Some(&true));
60+
assert_eq!(labels.get(&expr), Some(&true));
61+
}
62+
}

vortex-array/src/expr/expression.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ impl Expression {
232232
self.stat_expression(Stat::Max, catalog)
233233
}
234234

235+
/// Returns whether this expression itself is null-sensitive.
236+
/// See [`VTable::is_null_sensitive`].
237+
pub fn is_null_sensitive(&self) -> bool {
238+
self.vtable.as_dyn().is_null_sensitive(self.data.as_ref())
239+
}
240+
235241
/// Format the expression as a compact string.
236242
///
237243
/// Since this is a recursive formatter, it is exposed on the public Expression type.

vortex-array/src/expr/exprs/between.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ impl VTable for Between {
151151
) -> Option<Expression> {
152152
expr.to_binary_expr().stat_falsification(catalog)
153153
}
154+
155+
fn is_null_sensitive(&self, _instance: &Self::Instance) -> bool {
156+
false
157+
}
154158
}
155159

156160
impl ExpressionView<'_, Between> {

vortex-array/src/expr/exprs/binary.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ impl VTable for Binary {
252252
Operator::Add | Operator::Sub | Operator::Mul | Operator::Div => None,
253253
}
254254
}
255+
256+
fn is_null_sensitive(&self, _instance: &Self::Instance) -> bool {
257+
false
258+
}
255259
}
256260

257261
impl ExpressionView<'_, Binary> {

0 commit comments

Comments
 (0)