Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 110 additions & 7 deletions arrow/src/compute/kernels/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ use crate::buffer::{bitwise_bin_op_helper, buffer_unary_not, Buffer, MutableBuff
use crate::compute::binary_boolean_kernel;
use crate::compute::util::combine_option_bitmap;
use crate::datatypes::{
ArrowNumericType, DataType, Float32Type, Float64Type, Int16Type, Int32Type,
Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
ArrowNumericType, DataType, Date32Type, Date64Type, Float32Type, Float64Type,
Int16Type, Int32Type, Int64Type, Int8Type, TimeUnit, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, UInt16Type,
UInt32Type, UInt64Type, UInt8Type,
};
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
Expand Down Expand Up @@ -1136,6 +1138,60 @@ macro_rules! typed_compares {
(DataType::LargeUtf8, DataType::LargeUtf8) => {
typed_cmp!($LEFT, $RIGHT, LargeStringArray, $OP_STR, i64)
}
(
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
) => {
typed_cmp!(
$LEFT,
$RIGHT,
TimestampNanosecondArray,
$OP_PRIM,
TimestampNanosecondType
)
}
(
DataType::Timestamp(TimeUnit::Microsecond, _),
DataType::Timestamp(TimeUnit::Microsecond, _),
) => {
typed_cmp!(
$LEFT,
$RIGHT,
TimestampMicrosecondArray,
$OP_PRIM,
TimestampMicrosecondType
)
}
(
DataType::Timestamp(TimeUnit::Millisecond, _),
DataType::Timestamp(TimeUnit::Millisecond, _),
) => {
typed_cmp!(
$LEFT,
$RIGHT,
TimestampMillisecondArray,
$OP_PRIM,
TimestampMillisecondType
)
}
(
DataType::Timestamp(TimeUnit::Second, _),
DataType::Timestamp(TimeUnit::Second, _),
) => {
typed_cmp!(
$LEFT,
$RIGHT,
TimestampSecondArray,
$OP_PRIM,
TimestampSecondType
)
}
(DataType::Date32, DataType::Date32) => {
typed_cmp!($LEFT, $RIGHT, Date32Array, $OP_PRIM, Date32Type)
}
(DataType::Date64, DataType::Date64) => {
typed_cmp!($LEFT, $RIGHT, Date64Array, $OP_PRIM, Date64Type)
}
(t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
"Comparing arrays of type {} is not yet implemented",
t1
Expand Down Expand Up @@ -1478,13 +1534,14 @@ mod tests {
use crate::{array::Int32Array, array::Int64Array, datatypes::Field};

/// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
/// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
/// `A_VEC` and `B_VEC` can be of type `Vec<T>` or `Vec<Option<T>>` where `T` is the native
/// type of the data type of the Arrow array element.
/// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
/// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
macro_rules! cmp_i64 {
($KERNEL:ident, $DYN_KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
let a = Int64Array::from($A_VEC);
let b = Int64Array::from($B_VEC);
macro_rules! cmp_vec {
($KERNEL:ident, $DYN_KERNEL:ident, $ARRAY:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
let a = $ARRAY::from($A_VEC);
let b = $ARRAY::from($B_VEC);
let c = $KERNEL(&a, &b).unwrap();
assert_eq!(BooleanArray::from($EXPECTED), c);

Expand All @@ -1496,6 +1553,16 @@ mod tests {
};
}

/// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
/// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
/// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
/// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
macro_rules! cmp_i64 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a nice refactor

($KERNEL:ident, $DYN_KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
cmp_vec!($KERNEL, $DYN_KERNEL, Int64Array, $A_VEC, $B_VEC, $EXPECTED);
};
}

/// Evaluate `KERNEL` with one vectors and one scalar as inputs and assert against the expected output.
/// `A_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
/// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
Expand All @@ -1517,6 +1584,15 @@ mod tests {
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);

cmp_vec!(
eq,
eq_dyn,
TimestampSecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
}

#[test]
Expand Down Expand Up @@ -1564,6 +1640,15 @@ mod tests {
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, true, true, true, true, false, true, true]
);

cmp_vec!(
neq,
neq_dyn,
TimestampMillisecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, true, true, true, true, false, true, true]
);
}

#[test]
Expand Down Expand Up @@ -1770,6 +1855,15 @@ mod tests {
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, false, true, true, false, false, false, true, true]
);

cmp_vec!(
lt,
lt_dyn,
TimestampMillisecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, false, true, true, false, false, false, true, true]
);
}

#[test]
Expand All @@ -1791,6 +1885,15 @@ mod tests {
vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
vec![None, None, None, Some(false), None, None, None, Some(true)]
);

cmp_vec!(
lt,
lt_dyn,
TimestampMillisecondArray,
vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
vec![None, None, None, Some(false), None, None, None, Some(true)]
);
}

#[test]
Expand Down