Skip to content

Commit ad0abe9

Browse files
move the Log, Power functions to datafusion-functions (#9983)
* move the Log, Power functions to datafusion-functions * match type instead of name * fix formatting errors
1 parent 1c4c002 commit ad0abe9

File tree

17 files changed

+641
-407
lines changed

17 files changed

+641
-407
lines changed

datafusion/core/tests/simplification.rs

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use datafusion_common::cast::as_int32_array;
2525
use datafusion_common::ScalarValue;
2626
use datafusion_common::{DFSchemaRef, ToDFSchema};
2727
use datafusion_expr::expr::ScalarFunction;
28+
use datafusion_expr::logical_plan::builder::table_scan_with_filters;
2829
use datafusion_expr::simplify::SimplifyInfo;
2930
use datafusion_expr::{
3031
expr, table_scan, BuiltinScalarFunction, Cast, ColumnarValue, Expr, ExprSchemable,
@@ -294,6 +295,45 @@ fn select_date_plus_interval() -> Result<()> {
294295
Ok(())
295296
}
296297

298+
#[test]
299+
fn simplify_project_scalar_fn() -> Result<()> {
300+
// Issue https://github.com/apache/arrow-datafusion/issues/5996
301+
let schema = Schema::new(vec![Field::new("f", DataType::Float64, false)]);
302+
let plan = table_scan(Some("test"), &schema, None)?
303+
.project(vec![power(col("f"), lit(1.0))])?
304+
.build()?;
305+
306+
// before simplify: power(t.f, 1.0)
307+
// after simplify: t.f as "power(t.f, 1.0)"
308+
let expected = "Projection: test.f AS power(test.f,Float64(1))\
309+
\n TableScan: test";
310+
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
311+
assert_eq!(expected, actual);
312+
Ok(())
313+
}
314+
315+
#[test]
316+
fn simplify_scan_predicate() -> Result<()> {
317+
let schema = Schema::new(vec![
318+
Field::new("f", DataType::Float64, false),
319+
Field::new("g", DataType::Float64, false),
320+
]);
321+
let plan = table_scan_with_filters(
322+
Some("test"),
323+
&schema,
324+
None,
325+
vec![col("g").eq(power(col("f"), lit(1.0)))],
326+
)?
327+
.build()?;
328+
329+
// before simplify: t.g = power(t.f, 1.0)
330+
// after simplify: (t.g = t.f) as "t.g = power(t.f, 1.0)"
331+
let expected = "TableScan: test, full_filters=[g = f AS g = power(f,Float64(1))]";
332+
let actual = get_optimized_plan_formatted(&plan, &Utc::now());
333+
assert_eq!(expected, actual);
334+
Ok(())
335+
}
336+
297337
#[test]
298338
fn test_const_evaluator() {
299339
// true --> true
@@ -431,3 +471,99 @@ fn multiple_now() -> Result<()> {
431471
assert_eq!(expected, actual);
432472
Ok(())
433473
}
474+
475+
// ------------------------------
476+
// --- Simplifier tests -----
477+
// ------------------------------
478+
479+
fn expr_test_schema() -> DFSchemaRef {
480+
Schema::new(vec![
481+
Field::new("c1", DataType::Utf8, true),
482+
Field::new("c2", DataType::Boolean, true),
483+
Field::new("c3", DataType::Int64, true),
484+
Field::new("c4", DataType::UInt32, true),
485+
Field::new("c1_non_null", DataType::Utf8, false),
486+
Field::new("c2_non_null", DataType::Boolean, false),
487+
Field::new("c3_non_null", DataType::Int64, false),
488+
Field::new("c4_non_null", DataType::UInt32, false),
489+
])
490+
.to_dfschema_ref()
491+
.unwrap()
492+
}
493+
494+
fn test_simplify(input_expr: Expr, expected_expr: Expr) {
495+
let info: MyInfo = MyInfo {
496+
schema: expr_test_schema(),
497+
execution_props: ExecutionProps::new(),
498+
};
499+
let simplifier = ExprSimplifier::new(info);
500+
let simplified_expr = simplifier
501+
.simplify(input_expr.clone())
502+
.expect("successfully evaluated");
503+
504+
assert_eq!(
505+
simplified_expr, expected_expr,
506+
"Mismatch evaluating {input_expr}\n Expected:{expected_expr}\n Got:{simplified_expr}"
507+
);
508+
}
509+
510+
#[test]
511+
fn test_simplify_log() {
512+
// Log(c3, 1) ===> 0
513+
{
514+
let expr = log(col("c3_non_null"), lit(1));
515+
test_simplify(expr, lit(0i64));
516+
}
517+
// Log(c3, c3) ===> 1
518+
{
519+
let expr = log(col("c3_non_null"), col("c3_non_null"));
520+
let expected = lit(1i64);
521+
test_simplify(expr, expected);
522+
}
523+
// Log(c3, Power(c3, c4)) ===> c4
524+
{
525+
let expr = log(
526+
col("c3_non_null"),
527+
power(col("c3_non_null"), col("c4_non_null")),
528+
);
529+
let expected = col("c4_non_null");
530+
test_simplify(expr, expected);
531+
}
532+
// Log(c3, c4) ===> Log(c3, c4)
533+
{
534+
let expr = log(col("c3_non_null"), col("c4_non_null"));
535+
let expected = log(col("c3_non_null"), col("c4_non_null"));
536+
test_simplify(expr, expected);
537+
}
538+
}
539+
540+
#[test]
541+
fn test_simplify_power() {
542+
// Power(c3, 0) ===> 1
543+
{
544+
let expr = power(col("c3_non_null"), lit(0));
545+
let expected = lit(1i64);
546+
test_simplify(expr, expected)
547+
}
548+
// Power(c3, 1) ===> c3
549+
{
550+
let expr = power(col("c3_non_null"), lit(1));
551+
let expected = col("c3_non_null");
552+
test_simplify(expr, expected)
553+
}
554+
// Power(c3, Log(c3, c4)) ===> c4
555+
{
556+
let expr = power(
557+
col("c3_non_null"),
558+
log(col("c3_non_null"), col("c4_non_null")),
559+
);
560+
let expected = col("c4_non_null");
561+
test_simplify(expr, expected)
562+
}
563+
// Power(c3, c4) ===> Power(c3, c4)
564+
{
565+
let expr = power(col("c3_non_null"), col("c4_non_null"));
566+
let expected = power(col("c3_non_null"), col("c4_non_null"));
567+
test_simplify(expr, expected)
568+
}
569+
}

datafusion/expr/src/built_in_function.rs

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,8 @@ pub enum BuiltinScalarFunction {
4747
Factorial,
4848
/// iszero
4949
Iszero,
50-
/// log, same as log10
51-
Log,
5250
/// nanvl
5351
Nanvl,
54-
/// power
55-
Power,
5652
/// round
5753
Round,
5854
/// trunc
@@ -128,9 +124,7 @@ impl BuiltinScalarFunction {
128124
BuiltinScalarFunction::Exp => Volatility::Immutable,
129125
BuiltinScalarFunction::Factorial => Volatility::Immutable,
130126
BuiltinScalarFunction::Iszero => Volatility::Immutable,
131-
BuiltinScalarFunction::Log => Volatility::Immutable,
132127
BuiltinScalarFunction::Nanvl => Volatility::Immutable,
133-
BuiltinScalarFunction::Power => Volatility::Immutable,
134128
BuiltinScalarFunction::Round => Volatility::Immutable,
135129
BuiltinScalarFunction::Cot => Volatility::Immutable,
136130
BuiltinScalarFunction::Trunc => Volatility::Immutable,
@@ -176,16 +170,6 @@ impl BuiltinScalarFunction {
176170

177171
BuiltinScalarFunction::Factorial => Ok(Int64),
178172

179-
BuiltinScalarFunction::Power => match &input_expr_types[0] {
180-
Int64 => Ok(Int64),
181-
_ => Ok(Float64),
182-
},
183-
184-
BuiltinScalarFunction::Log => match &input_expr_types[0] {
185-
Float32 => Ok(Float32),
186-
_ => Ok(Float64),
187-
},
188-
189173
BuiltinScalarFunction::Nanvl => match &input_expr_types[0] {
190174
Float32 => Ok(Float32),
191175
_ => Ok(Float64),
@@ -233,10 +217,6 @@ impl BuiltinScalarFunction {
233217
self.volatility(),
234218
),
235219
BuiltinScalarFunction::Random => Signature::exact(vec![], self.volatility()),
236-
BuiltinScalarFunction::Power => Signature::one_of(
237-
vec![Exact(vec![Int64, Int64]), Exact(vec![Float64, Float64])],
238-
self.volatility(),
239-
),
240220
BuiltinScalarFunction::Round => Signature::one_of(
241221
vec![
242222
Exact(vec![Float64, Int64]),
@@ -255,16 +235,6 @@ impl BuiltinScalarFunction {
255235
],
256236
self.volatility(),
257237
),
258-
259-
BuiltinScalarFunction::Log => Signature::one_of(
260-
vec![
261-
Exact(vec![Float32]),
262-
Exact(vec![Float64]),
263-
Exact(vec![Float32, Float32]),
264-
Exact(vec![Float64, Float64]),
265-
],
266-
self.volatility(),
267-
),
268238
BuiltinScalarFunction::Nanvl => Signature::one_of(
269239
vec![Exact(vec![Float32, Float32]), Exact(vec![Float64, Float64])],
270240
self.volatility(),
@@ -302,8 +272,6 @@ impl BuiltinScalarFunction {
302272
| BuiltinScalarFunction::Trunc
303273
) {
304274
Some(vec![Some(true)])
305-
} else if *self == BuiltinScalarFunction::Log {
306-
Some(vec![Some(true), Some(false)])
307275
} else {
308276
None
309277
}
@@ -317,9 +285,7 @@ impl BuiltinScalarFunction {
317285
BuiltinScalarFunction::Exp => &["exp"],
318286
BuiltinScalarFunction::Factorial => &["factorial"],
319287
BuiltinScalarFunction::Iszero => &["iszero"],
320-
BuiltinScalarFunction::Log => &["log"],
321288
BuiltinScalarFunction::Nanvl => &["nanvl"],
322-
BuiltinScalarFunction::Power => &["power", "pow"],
323289
BuiltinScalarFunction::Random => &["random"],
324290
BuiltinScalarFunction::Round => &["round"],
325291
BuiltinScalarFunction::Trunc => &["trunc"],

datafusion/expr/src/expr_fn.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -546,9 +546,6 @@ nary_scalar_expr!(
546546
);
547547
scalar_expr!(Exp, exp, num, "exponential");
548548

549-
scalar_expr!(Power, power, base exponent, "`base` raised to the power of `exponent`");
550-
scalar_expr!(Log, log, base x, "logarithm of a `x` for a particular `base`");
551-
552549
scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase");
553550
scalar_expr!(EndsWith, ends_with, string suffix, "whether the `string` ends with the `suffix`");
554551
nary_scalar_expr!(Coalesce, coalesce, "returns `coalesce(args...)`, which evaluates to the value of the first [Expr] which is not NULL");

datafusion/functions/src/macros.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,19 @@ macro_rules! make_math_binary_udf {
357357
};
358358
}
359359

360+
macro_rules! make_function_scalar_inputs {
361+
($ARG: expr, $NAME:expr, $ARRAY_TYPE:ident, $FUNC: block) => {{
362+
let arg = downcast_arg!($ARG, $NAME, $ARRAY_TYPE);
363+
364+
arg.iter()
365+
.map(|a| match a {
366+
Some(a) => Some($FUNC(a)),
367+
_ => None,
368+
})
369+
.collect::<$ARRAY_TYPE>()
370+
}};
371+
}
372+
360373
macro_rules! make_function_inputs2 {
361374
($ARG1: expr, $ARG2: expr, $NAME1:expr, $NAME2: expr, $ARRAY_TYPE:ident, $FUNC: block) => {{
362375
let arg1 = downcast_arg!($ARG1, $NAME1, $ARRAY_TYPE);

0 commit comments

Comments
 (0)