Skip to content

Commit 4d68b6d

Browse files
authored
Useeq_dyn, neq_dyn, lt_dyn, lt_eq_dyn, gt_dyn, gt_eq_dyn kernels from arrow (#1475)
* Use Dynamic Dispatch kernels in Arrow `eq_dyn` * wrap eq_dyn * Rework how kernels are wrapped * touchups * update comment
1 parent 4c0b17f commit 4d68b6d

File tree

1 file changed

+57
-15
lines changed
  • datafusion/src/physical_plan/expressions

1 file changed

+57
-15
lines changed

datafusion/src/physical_plan/expressions/binary.rs

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ use arrow::compute::kernels::arithmetic::{
2525
multiply_scalar, subtract, subtract_scalar,
2626
};
2727
use arrow::compute::kernels::boolean::{and_kleene, not, or_kleene};
28-
use arrow::compute::kernels::comparison::{eq, gt, gt_eq, lt, lt_eq, neq};
2928
use arrow::compute::kernels::comparison::{
30-
eq_bool, eq_bool_scalar, gt_bool, gt_bool_scalar, gt_eq_bool, gt_eq_bool_scalar,
31-
lt_bool, lt_bool_scalar, lt_eq_bool, lt_eq_bool_scalar, neq_bool, neq_bool_scalar,
29+
eq_bool_scalar, gt_bool_scalar, gt_eq_bool_scalar, lt_bool_scalar, lt_eq_bool_scalar,
30+
neq_bool_scalar,
3231
};
3332
use arrow::compute::kernels::comparison::{
3433
eq_dyn_bool_scalar, gt_dyn_bool_scalar, gt_eq_dyn_bool_scalar, lt_dyn_bool_scalar,
@@ -45,15 +44,12 @@ use arrow::compute::kernels::comparison::{
4544
use arrow::compute::kernels::comparison::{
4645
eq_scalar, gt_eq_scalar, gt_scalar, lt_eq_scalar, lt_scalar, neq_scalar,
4746
};
48-
use arrow::compute::kernels::comparison::{
49-
eq_utf8, gt_eq_utf8, gt_utf8, like_utf8, lt_eq_utf8, lt_utf8, neq_utf8, nlike_utf8,
50-
regexp_is_match_utf8,
51-
};
5247
use arrow::compute::kernels::comparison::{
5348
eq_utf8_scalar, gt_eq_utf8_scalar, gt_utf8_scalar, like_utf8_scalar,
5449
lt_eq_utf8_scalar, lt_utf8_scalar, neq_utf8_scalar, nlike_utf8_scalar,
5550
regexp_is_match_utf8_scalar,
5651
};
52+
use arrow::compute::kernels::comparison::{like_utf8, nlike_utf8, regexp_is_match_utf8};
5753
use arrow::datatypes::{ArrowNumericType, DataType, Schema, TimeUnit};
5854
use arrow::error::ArrowError::DivideByZero;
5955
use arrow::record_batch::RecordBatch;
@@ -65,6 +61,50 @@ use crate::physical_plan::{ColumnarValue, PhysicalExpr};
6561
use crate::scalar::ScalarValue;
6662
use datafusion_expr::Operator;
6763

64+
// TODO move to arrow_rs
65+
// https://github.com/apache/arrow-rs/issues/1312
66+
fn as_decimal_array(arr: &dyn Array) -> &DecimalArray {
67+
arr.as_any()
68+
.downcast_ref::<DecimalArray>()
69+
.expect("Unable to downcast to typed array to DecimalArray")
70+
}
71+
72+
/// create a `dyn_op` wrapper function for the specified operation
73+
/// that call the underlying dyn_op arrow kernel if the type is
74+
/// supported, and translates ArrowError to DataFusionError
75+
macro_rules! make_dyn_comp_op {
76+
($OP:tt) => {
77+
paste::paste! {
78+
/// wrapper over arrow compute kernel that maps Error types and
79+
/// patches missing support in arrow
80+
fn [<$OP _dyn>] (left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> {
81+
match (left.data_type(), right.data_type()) {
82+
// Call `op_decimal` (e.g. `eq_decimal) until
83+
// arrow has native support
84+
// https://github.com/apache/arrow-rs/issues/1200
85+
(DataType::Decimal(_, _), DataType::Decimal(_, _)) => {
86+
[<$OP _decimal>](as_decimal_array(left), as_decimal_array(right))
87+
},
88+
// By default call the arrow kernel
89+
_ => {
90+
arrow::compute::kernels::comparison::[<$OP _dyn>](left, right)
91+
.map_err(|e| e.into())
92+
}
93+
}
94+
.map(|a| Arc::new(a) as ArrayRef)
95+
}
96+
}
97+
};
98+
}
99+
100+
// create eq_dyn, gt_dyn, wrappers etc
101+
make_dyn_comp_op!(eq);
102+
make_dyn_comp_op!(gt);
103+
make_dyn_comp_op!(gt_eq);
104+
make_dyn_comp_op!(lt);
105+
make_dyn_comp_op!(lt_eq);
106+
make_dyn_comp_op!(neq);
107+
68108
// Simple (low performance) kernels until optimized kernels are added to arrow
69109
// See https://github.com/apache/arrow-rs/issues/960
70110

@@ -91,8 +131,10 @@ fn is_not_distinct_from_bool(
91131
.collect())
92132
}
93133

94-
// TODO add iter for decimal array
95-
// TODO move this to arrow-rs
134+
// TODO move decimal kernels to to arrow-rs
135+
// https://github.com/apache/arrow-rs/issues/1200
136+
137+
// TODO use iter added for for decimal array in
96138
// https://github.com/apache/arrow-rs/issues/1083
97139
pub(super) fn eq_decimal_scalar(
98140
left: &DecimalArray,
@@ -1194,12 +1236,12 @@ impl BinaryExpr {
11941236
match &self.op {
11951237
Operator::Like => binary_string_array_op!(left, right, like),
11961238
Operator::NotLike => binary_string_array_op!(left, right, nlike),
1197-
Operator::Lt => binary_array_op!(left, right, lt),
1198-
Operator::LtEq => binary_array_op!(left, right, lt_eq),
1199-
Operator::Gt => binary_array_op!(left, right, gt),
1200-
Operator::GtEq => binary_array_op!(left, right, gt_eq),
1201-
Operator::Eq => binary_array_op!(left, right, eq),
1202-
Operator::NotEq => binary_array_op!(left, right, neq),
1239+
Operator::Lt => lt_dyn(&left, &right),
1240+
Operator::LtEq => lt_eq_dyn(&left, &right),
1241+
Operator::Gt => gt_dyn(&left, &right),
1242+
Operator::GtEq => gt_eq_dyn(&left, &right),
1243+
Operator::Eq => eq_dyn(&left, &right),
1244+
Operator::NotEq => neq_dyn(&left, &right),
12031245
Operator::IsDistinctFrom => binary_array_op!(left, right, is_distinct_from),
12041246
Operator::IsNotDistinctFrom => {
12051247
binary_array_op!(left, right, is_not_distinct_from)

0 commit comments

Comments
 (0)