Skip to content

Commit b5dfdbe

Browse files
authored
feat: add metadata to literal expressions (#16170) (#16315)
1 parent c76c1f0 commit b5dfdbe

File tree

94 files changed

+724
-421
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+724
-421
lines changed

datafusion-cli/src/functions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ pub struct ParquetMetadataFunc {}
322322
impl TableFunctionImpl for ParquetMetadataFunc {
323323
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
324324
let filename = match exprs.first() {
325-
Some(Expr::Literal(ScalarValue::Utf8(Some(s)))) => s, // single quote: parquet_metadata('x.parquet')
325+
Some(Expr::Literal(ScalarValue::Utf8(Some(s)), _)) => s, // single quote: parquet_metadata('x.parquet')
326326
Some(Expr::Column(Column { name, .. })) => name, // double quote: parquet_metadata("x.parquet")
327327
_ => {
328328
return plan_err!(

datafusion-examples/examples/expr_api.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ async fn main() -> Result<()> {
6565
let expr2 = Expr::BinaryExpr(BinaryExpr::new(
6666
Box::new(col("a")),
6767
Operator::Plus,
68-
Box::new(Expr::Literal(ScalarValue::Int32(Some(5)))),
68+
Box::new(Expr::Literal(ScalarValue::Int32(Some(5)), None)),
6969
));
7070
assert_eq!(expr, expr2);
7171

datafusion-examples/examples/optimizer_rule.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ fn is_binary_eq(binary_expr: &BinaryExpr) -> bool {
171171

172172
/// Return true if the expression is a literal or column reference
173173
fn is_lit_or_col(expr: &Expr) -> bool {
174-
matches!(expr, Expr::Column(_) | Expr::Literal(_))
174+
matches!(expr, Expr::Column(_) | Expr::Literal(_, _))
175175
}
176176

177177
/// A simple user defined filter function

datafusion-examples/examples/simple_udtf.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ struct LocalCsvTableFunc {}
133133

134134
impl TableFunctionImpl for LocalCsvTableFunc {
135135
fn call(&self, exprs: &[Expr]) -> Result<Arc<dyn TableProvider>> {
136-
let Some(Expr::Literal(ScalarValue::Utf8(Some(ref path)))) = exprs.first() else {
136+
let Some(Expr::Literal(ScalarValue::Utf8(Some(ref path)), _)) = exprs.first()
137+
else {
137138
return plan_err!("read_csv requires at least one string argument");
138139
};
139140

@@ -145,7 +146,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
145146
let info = SimplifyContext::new(&execution_props);
146147
let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
147148

148-
if let Expr::Literal(ScalarValue::Int64(Some(limit))) = expr {
149+
if let Expr::Literal(ScalarValue::Int64(Some(limit)), _) = expr {
149150
Ok(limit as usize)
150151
} else {
151152
plan_err!("Limit must be an integer")

datafusion/catalog-listing/src/helpers.rs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool {
6161
Ok(TreeNodeRecursion::Stop)
6262
}
6363
}
64-
Expr::Literal(_)
64+
Expr::Literal(_, _)
6565
| Expr::Alias(_)
6666
| Expr::OuterReferenceColumn(_, _)
6767
| Expr::ScalarVariable(_, _)
@@ -346,8 +346,8 @@ fn populate_partition_values<'a>(
346346
{
347347
match op {
348348
Operator::Eq => match (left.as_ref(), right.as_ref()) {
349-
(Expr::Column(Column { ref name, .. }), Expr::Literal(val))
350-
| (Expr::Literal(val), Expr::Column(Column { ref name, .. })) => {
349+
(Expr::Column(Column { ref name, .. }), Expr::Literal(val, _))
350+
| (Expr::Literal(val, _), Expr::Column(Column { ref name, .. })) => {
351351
if partition_values
352352
.insert(name, PartitionValue::Single(val.to_string()))
353353
.is_some()
@@ -984,7 +984,7 @@ mod tests {
984984
assert_eq!(
985985
evaluate_partition_prefix(
986986
partitions,
987-
&[col("a").eq(Expr::Literal(ScalarValue::Date32(Some(3))))],
987+
&[col("a").eq(Expr::Literal(ScalarValue::Date32(Some(3)), None))],
988988
),
989989
Some(Path::from("a=1970-01-04")),
990990
);
@@ -993,9 +993,10 @@ mod tests {
993993
assert_eq!(
994994
evaluate_partition_prefix(
995995
partitions,
996-
&[col("a").eq(Expr::Literal(ScalarValue::Date64(Some(
997-
4 * 24 * 60 * 60 * 1000
998-
)))),],
996+
&[col("a").eq(Expr::Literal(
997+
ScalarValue::Date64(Some(4 * 24 * 60 * 60 * 1000)),
998+
None
999+
)),],
9991000
),
10001001
Some(Path::from("a=1970-01-05")),
10011002
);

datafusion/core/benches/map_query_sql.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,11 @@ fn criterion_benchmark(c: &mut Criterion) {
7171
let mut value_buffer = Vec::new();
7272

7373
for i in 0..1000 {
74-
key_buffer.push(Expr::Literal(ScalarValue::Utf8(Some(keys[i].clone()))));
75-
value_buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i]))));
74+
key_buffer.push(Expr::Literal(
75+
ScalarValue::Utf8(Some(keys[i].clone())),
76+
None,
77+
));
78+
value_buffer.push(Expr::Literal(ScalarValue::Int32(Some(values[i])), None));
7679
}
7780
c.bench_function("map_1000_1", |b| {
7881
b.iter(|| {

datafusion/core/src/dataframe/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1337,7 +1337,10 @@ impl DataFrame {
13371337
/// ```
13381338
pub async fn count(self) -> Result<usize> {
13391339
let rows = self
1340-
.aggregate(vec![], vec![count(Expr::Literal(COUNT_STAR_EXPANSION))])?
1340+
.aggregate(
1341+
vec![],
1342+
vec![count(Expr::Literal(COUNT_STAR_EXPANSION, None))],
1343+
)?
13411344
.collect()
13421345
.await?;
13431346
let len = *rows

datafusion/core/src/datasource/listing/table.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2230,7 +2230,7 @@ mod tests {
22302230
let filter_predicate = Expr::BinaryExpr(BinaryExpr::new(
22312231
Box::new(Expr::Column("column1".into())),
22322232
Operator::GtEq,
2233-
Box::new(Expr::Literal(ScalarValue::Int32(Some(0)))),
2233+
Box::new(Expr::Literal(ScalarValue::Int32(Some(0)), None)),
22342234
));
22352235

22362236
// Create a new batch of data to insert into the table

datafusion/core/src/execution/context/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1214,7 +1214,7 @@ impl SessionContext {
12141214
let mut params: Vec<ScalarValue> = parameters
12151215
.into_iter()
12161216
.map(|e| match e {
1217-
Expr::Literal(scalar) => Ok(scalar),
1217+
Expr::Literal(scalar, _) => Ok(scalar),
12181218
_ => not_impl_err!("Unsupported parameter type: {}", e),
12191219
})
12201220
.collect::<Result<_>>()?;

datafusion/core/src/physical_planner.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2257,7 +2257,8 @@ mod tests {
22572257
// verify that the plan correctly casts u8 to i64
22582258
// the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
22592259
// the cast here is implicit so has CastOptions with safe=true
2260-
let expected = "BinaryExpr { left: Column { name: \"c7\", index: 2 }, op: Lt, right: Literal { value: Int64(5) }, fail_on_overflow: false }";
2260+
let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "5", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#;
2261+
22612262
assert!(format!("{exec_plan:?}").contains(expected));
22622263
Ok(())
22632264
}
@@ -2282,7 +2283,7 @@ mod tests {
22822283
&session_state,
22832284
);
22842285

2285-
let expected = r#"Ok(PhysicalGroupBy { expr: [(Column { name: "c1", index: 0 }, "c1"), (Column { name: "c2", index: 1 }, "c2"), (Column { name: "c3", index: 2 }, "c3")], null_expr: [(Literal { value: Utf8(NULL) }, "c1"), (Literal { value: Int64(NULL) }, "c2"), (Literal { value: Int64(NULL) }, "c3")], groups: [[false, false, false], [true, false, false], [false, true, false], [false, false, true], [true, true, false], [true, false, true], [false, true, true], [true, true, true]] })"#;
2286+
let expected = r#"Ok(PhysicalGroupBy { expr: [(Column { name: "c1", index: 0 }, "c1"), (Column { name: "c2", index: 1 }, "c2"), (Column { name: "c3", index: 2 }, "c3")], null_expr: [(Literal { value: Utf8(NULL), field: Field { name: "NULL", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} } }, "c1"), (Literal { value: Int64(NULL), field: Field { name: "NULL", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} } }, "c2"), (Literal { value: Int64(NULL), field: Field { name: "NULL", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} } }, "c3")], groups: [[false, false, false], [true, false, false], [false, true, false], [false, false, true], [true, true, false], [true, false, true], [false, true, true], [true, true, true]] })"#;
22862287

22872288
assert_eq!(format!("{cube:?}"), expected);
22882289

@@ -2309,7 +2310,7 @@ mod tests {
23092310
&session_state,
23102311
);
23112312

2312-
let expected = r#"Ok(PhysicalGroupBy { expr: [(Column { name: "c1", index: 0 }, "c1"), (Column { name: "c2", index: 1 }, "c2"), (Column { name: "c3", index: 2 }, "c3")], null_expr: [(Literal { value: Utf8(NULL) }, "c1"), (Literal { value: Int64(NULL) }, "c2"), (Literal { value: Int64(NULL) }, "c3")], groups: [[true, true, true], [false, true, true], [false, false, true], [false, false, false]] })"#;
2313+
let expected = r#"Ok(PhysicalGroupBy { expr: [(Column { name: "c1", index: 0 }, "c1"), (Column { name: "c2", index: 1 }, "c2"), (Column { name: "c3", index: 2 }, "c3")], null_expr: [(Literal { value: Utf8(NULL), field: Field { name: "NULL", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} } }, "c1"), (Literal { value: Int64(NULL), field: Field { name: "NULL", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} } }, "c2"), (Literal { value: Int64(NULL), field: Field { name: "NULL", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} } }, "c3")], groups: [[true, true, true], [false, true, true], [false, false, true], [false, false, false]] })"#;
23132314

23142315
assert_eq!(format!("{rollup:?}"), expected);
23152316

@@ -2493,7 +2494,7 @@ mod tests {
24932494
let execution_plan = plan(&logical_plan).await?;
24942495
// verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
24952496

2496-
let expected = "expr: [(BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\") }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\") }, fail_on_overflow: false }, fail_on_overflow: false }";
2497+
let expected = "expr: [(BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\"), field: Field { name: \"a\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\"), field: Field { name: \"1\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, fail_on_overflow: false }";
24972498

24982499
let actual = format!("{execution_plan:?}");
24992500
assert!(actual.contains(expected), "{}", actual);

0 commit comments

Comments
 (0)