Skip to content

Commit efcdd4a

Browse files
author
Jiayu Liu
committed
add docs and comments
1 parent b44238d commit efcdd4a

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

datafusion/src/physical_plan/functions.rs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,14 @@ use crate::{
4343
scalar::ScalarValue,
4444
};
4545
use arrow::{
46-
array::ArrayRef,
46+
array::{ArrayRef, NullArray},
4747
compute::kernels::length::{bit_length, length},
4848
datatypes::TimeUnit,
4949
datatypes::{DataType, Field, Int32Type, Int64Type, Schema},
5050
record_batch::RecordBatch,
5151
};
5252
use fmt::{Debug, Formatter};
53+
use std::convert::From;
5354
use std::{any::Any, fmt, str::FromStr, sync::Arc};
5455

5556
/// A function's signature, which defines the function's supported argument types.
@@ -75,6 +76,13 @@ pub enum Signature {
7576
}
7677

7778
/// Scalar function
79+
///
80+
/// The Fn param is the wrapped function but be aware that the function will
81+
/// be passed with the slice / vec of columnar values (either scalar or array)
82+
/// with the exception of zero param function, where a singular element vec
83+
/// will be passed. In that case the single element is a null array to indicate
84+
/// the batch's row count (so that the generative zero-argument function can know
85+
/// the result array size).
7886
pub type ScalarFunctionImplementation =
7987
Arc<dyn Fn(&[ColumnarValue]) -> Result<ColumnarValue> + Send + Sync>;
8088

@@ -1358,6 +1366,17 @@ impl fmt::Display for ScalarFunctionExpr {
13581366
}
13591367
}
13601368

1369+
/// null columnar values are implemented as a null array in order to pass batch
1370+
/// num_rows
1371+
type NullColumnarValue = ColumnarValue;
1372+
1373+
impl From<&RecordBatch> for NullColumnarValue {
1374+
fn from(batch: &RecordBatch) -> Self {
1375+
let num_rows = batch.num_rows();
1376+
ColumnarValue::Array(Arc::new(NullArray::new(num_rows)))
1377+
}
1378+
}
1379+
13611380
impl PhysicalExpr for ScalarFunctionExpr {
13621381
/// Return a reference to Any that can be used for downcasting
13631382
fn as_any(&self) -> &dyn Any {
@@ -1373,20 +1392,24 @@ impl PhysicalExpr for ScalarFunctionExpr {
13731392
}
13741393

13751394
fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
1376-
// evaluate the arguments
1377-
let inputs = self
1378-
.args
1379-
.iter()
1380-
.map(|e| e.evaluate(batch))
1381-
.collect::<Result<Vec<_>>>()?;
1395+
// evaluate the arguments, if there are no arguments we'll instead pass in a null array
1396+
// indicating the batch size (as a convention)
1397+
let inputs = match self.args.len() {
1398+
0 => vec![NullColumnarValue::from(batch)],
1399+
_ => self
1400+
.args
1401+
.iter()
1402+
.map(|e| e.evaluate(batch))
1403+
.collect::<Result<Vec<_>>>()?,
1404+
};
13821405

13831406
// evaluate the function
13841407
let fun = self.fun.as_ref();
13851408
(fun)(&inputs)
13861409
}
13871410
}
13881411

1389-
/// decorates a function to handle [`ScalarValue`]s by coverting them to arrays before calling the function
1412+
/// decorates a function to handle [`ScalarValue`]s by converting them to arrays before calling the function
13901413
/// and vice-versa after evaluation.
13911414
pub fn make_scalar_function<F>(inner: F) -> ScalarFunctionImplementation
13921415
where

datafusion/src/physical_plan/udf.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,13 @@ pub struct ScalarUDF {
4343
/// Return type
4444
pub return_type: ReturnTypeFunction,
4545
/// actual implementation
46+
///
47+
/// The fn param is the wrapped function but be aware that the function will
48+
/// be passed with the slice / vec of columnar values (either scalar or array)
49+
/// with the exception of zero param function, where a singular element vec
50+
/// will be passed. In that case the single element is a null array to indicate
51+
/// the batch's row count (so that the generative zero-argument function can know
52+
/// the result array size).
4653
pub fun: ScalarFunctionImplementation,
4754
}
4855

0 commit comments

Comments
 (0)