Skip to content

Commit 96a9a54

Browse files
authored
Add comparison support for fully qualified BinaryArray (#1195)
* add eq_dyn for BinaryArray Signed-off-by: remzi <13716567376yh@gmail.com> * correct the code formatting Signed-off-by: remzi <13716567376yh@gmail.com> * add comparison support for fully qualified binary array delete dyn comparison which will be added in successive PRs Signed-off-by: remzi <13716567376yh@gmail.com> * add tests for comparison of fully qualified BinaryArray Signed-off-by: remzi <13716567376yh@gmail.com> * add 2 missed tests Signed-off-by: remzi <13716567376yh@gmail.com> * move 2 functions Signed-off-by: remzi <13716567376yh@gmail.com> * fix reference error Signed-off-by: remzi <13716567376yh@gmail.com>
1 parent 799330b commit 96a9a54

File tree

1 file changed

+267
-0
lines changed

1 file changed

+267
-0
lines changed

arrow/src/compute/kernels/comparison.rs

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,102 @@ pub fn neq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray>
810810
eq_bool_scalar(left, !right)
811811
}
812812

813+
/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
814+
pub fn eq_binary<OffsetSize: BinaryOffsetSizeTrait>(
815+
left: &GenericBinaryArray<OffsetSize>,
816+
right: &GenericBinaryArray<OffsetSize>,
817+
) -> Result<BooleanArray> {
818+
compare_op!(left, right, |a, b| a == b)
819+
}
820+
821+
/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar
822+
pub fn eq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
823+
left: &GenericBinaryArray<OffsetSize>,
824+
right: &[u8],
825+
) -> Result<BooleanArray> {
826+
compare_op_scalar!(left, right, |a, b| a == b)
827+
}
828+
829+
/// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
830+
pub fn neq_binary<OffsetSize: BinaryOffsetSizeTrait>(
831+
left: &GenericBinaryArray<OffsetSize>,
832+
right: &GenericBinaryArray<OffsetSize>,
833+
) -> Result<BooleanArray> {
834+
compare_op!(left, right, |a, b| a != b)
835+
}
836+
837+
/// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
838+
pub fn neq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
839+
left: &GenericBinaryArray<OffsetSize>,
840+
right: &[u8],
841+
) -> Result<BooleanArray> {
842+
compare_op_scalar!(left, right, |a, b| a != b)
843+
}
844+
845+
/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
846+
pub fn lt_binary<OffsetSize: BinaryOffsetSizeTrait>(
847+
left: &GenericBinaryArray<OffsetSize>,
848+
right: &GenericBinaryArray<OffsetSize>,
849+
) -> Result<BooleanArray> {
850+
compare_op!(left, right, |a, b| a < b)
851+
}
852+
853+
/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
854+
pub fn lt_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
855+
left: &GenericBinaryArray<OffsetSize>,
856+
right: &[u8],
857+
) -> Result<BooleanArray> {
858+
compare_op_scalar!(left, right, |a, b| a < b)
859+
}
860+
861+
/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
862+
pub fn lt_eq_binary<OffsetSize: BinaryOffsetSizeTrait>(
863+
left: &GenericBinaryArray<OffsetSize>,
864+
right: &GenericBinaryArray<OffsetSize>,
865+
) -> Result<BooleanArray> {
866+
compare_op!(left, right, |a, b| a <= b)
867+
}
868+
869+
/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
870+
pub fn lt_eq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
871+
left: &GenericBinaryArray<OffsetSize>,
872+
right: &[u8],
873+
) -> Result<BooleanArray> {
874+
compare_op_scalar!(left, right, |a, b| a <= b)
875+
}
876+
877+
/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
878+
pub fn gt_binary<OffsetSize: BinaryOffsetSizeTrait>(
879+
left: &GenericBinaryArray<OffsetSize>,
880+
right: &GenericBinaryArray<OffsetSize>,
881+
) -> Result<BooleanArray> {
882+
compare_op!(left, right, |a, b| a > b)
883+
}
884+
885+
/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
886+
pub fn gt_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
887+
left: &GenericBinaryArray<OffsetSize>,
888+
right: &[u8],
889+
) -> Result<BooleanArray> {
890+
compare_op_scalar!(left, right, |a, b| a > b)
891+
}
892+
893+
/// Perform `left >= right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
894+
pub fn gt_eq_binary<OffsetSize: BinaryOffsetSizeTrait>(
895+
left: &GenericBinaryArray<OffsetSize>,
896+
right: &GenericBinaryArray<OffsetSize>,
897+
) -> Result<BooleanArray> {
898+
compare_op!(left, right, |a, b| a >= b)
899+
}
900+
901+
/// Perform `left >= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
902+
pub fn gt_eq_binary_scalar<OffsetSize: BinaryOffsetSizeTrait>(
903+
left: &GenericBinaryArray<OffsetSize>,
904+
right: &[u8],
905+
) -> Result<BooleanArray> {
906+
compare_op_scalar!(left, right, |a, b| a >= b)
907+
}
908+
813909
/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`].
814910
pub fn neq_utf8<OffsetSize: StringOffsetSizeTrait>(
815911
left: &GenericStringArray<OffsetSize>,
@@ -2794,6 +2890,177 @@ mod tests {
27942890
);
27952891
}
27962892

2893+
macro_rules! test_binary {
2894+
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
2895+
#[test]
2896+
fn $test_name() {
2897+
let left = BinaryArray::from_vec($left);
2898+
let right = BinaryArray::from_vec($right);
2899+
let res = $op(&left, &right).unwrap();
2900+
let expected = $expected;
2901+
assert_eq!(expected.len(), res.len());
2902+
for i in 0..res.len() {
2903+
let v = res.value(i);
2904+
assert_eq!(v, expected[i]);
2905+
}
2906+
2907+
let left = LargeBinaryArray::from_vec($left);
2908+
let right = LargeBinaryArray::from_vec($right);
2909+
let res = $op(&left, &right).unwrap();
2910+
let expected = $expected;
2911+
assert_eq!(expected.len(), res.len());
2912+
for i in 0..res.len() {
2913+
let v = res.value(i);
2914+
assert_eq!(v, expected[i]);
2915+
}
2916+
}
2917+
};
2918+
}
2919+
2920+
#[test]
2921+
fn test_binary_eq_scalar_on_slice() {
2922+
let a = BinaryArray::from_opt_vec(
2923+
vec![Some(b"hi"), None, Some(b"hello"), Some(b"world")],
2924+
);
2925+
let a = a.slice(1, 3);
2926+
let a = as_generic_binary_array::<i32>(&a);
2927+
let a_eq = eq_binary_scalar(a, b"hello").unwrap();
2928+
assert_eq!(
2929+
a_eq,
2930+
BooleanArray::from(vec![None, Some(true), Some(false)])
2931+
);
2932+
}
2933+
2934+
macro_rules! test_binary_scalar {
2935+
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
2936+
#[test]
2937+
fn $test_name() {
2938+
let left = BinaryArray::from_vec($left);
2939+
let res = $op(&left, $right).unwrap();
2940+
let expected = $expected;
2941+
assert_eq!(expected.len(), res.len());
2942+
for i in 0..res.len() {
2943+
let v = res.value(i);
2944+
assert_eq!(
2945+
v,
2946+
expected[i],
2947+
"unexpected result when comparing {:?} at position {} to {:?} ",
2948+
left.value(i),
2949+
i,
2950+
$right
2951+
);
2952+
}
2953+
2954+
let left = LargeBinaryArray::from_vec($left);
2955+
let res = $op(&left, $right).unwrap();
2956+
let expected = $expected;
2957+
assert_eq!(expected.len(), res.len());
2958+
for i in 0..res.len() {
2959+
let v = res.value(i);
2960+
assert_eq!(
2961+
v,
2962+
expected[i],
2963+
"unexpected result when comparing {:?} at position {} to {:?} ",
2964+
left.value(i),
2965+
i,
2966+
$right
2967+
);
2968+
}
2969+
}
2970+
};
2971+
}
2972+
2973+
test_binary!(
2974+
test_binary_array_eq,
2975+
vec![b"arrow", b"arrow", b"arrow", b"arrow"],
2976+
vec![b"arrow", b"parquet", b"datafusion", b"flight"],
2977+
eq_binary,
2978+
vec![true, false, false, false]
2979+
);
2980+
2981+
test_binary_scalar!(
2982+
test_binary_array_eq_scalar,
2983+
vec![b"arrow", b"parquet", b"datafusion", b"flight"],
2984+
"arrow".as_bytes(),
2985+
eq_binary_scalar,
2986+
vec![true, false, false, false]
2987+
);
2988+
2989+
test_binary!(
2990+
test_binary_array_neq,
2991+
vec![b"arrow", b"arrow", b"arrow", b"arrow"],
2992+
vec![b"arrow", b"parquet", b"datafusion", b"flight"],
2993+
neq_binary,
2994+
vec![false, true, true, true]
2995+
);
2996+
test_binary_scalar!(
2997+
test_binary_array_neq_scalar,
2998+
vec![b"arrow", b"parquet", b"datafusion", b"flight"],
2999+
"arrow".as_bytes(),
3000+
neq_binary_scalar,
3001+
vec![false, true, true, true]
3002+
);
3003+
3004+
test_binary!(
3005+
test_binary_array_lt,
3006+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3007+
vec![b"flight", b"flight", b"flight", b"flight"],
3008+
lt_binary,
3009+
vec![true, true, false, false]
3010+
);
3011+
test_binary_scalar!(
3012+
test_binary_array_lt_scalar,
3013+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3014+
"flight".as_bytes(),
3015+
lt_binary_scalar,
3016+
vec![true, true, false, false]
3017+
);
3018+
3019+
test_binary!(
3020+
test_binary_array_lt_eq,
3021+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3022+
vec![b"flight", b"flight", b"flight", b"flight"],
3023+
lt_eq_binary,
3024+
vec![true, true, true, false]
3025+
);
3026+
test_binary_scalar!(
3027+
test_binary_array_lt_eq_scalar,
3028+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3029+
"flight".as_bytes(),
3030+
lt_eq_binary_scalar,
3031+
vec![true, true, true, false]
3032+
);
3033+
3034+
test_binary!(
3035+
test_binary_array_gt,
3036+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3037+
vec![b"flight", b"flight", b"flight", b"flight"],
3038+
gt_binary,
3039+
vec![false, false, false, true]
3040+
);
3041+
test_binary_scalar!(
3042+
test_binary_array_gt_scalar,
3043+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3044+
"flight".as_bytes(),
3045+
gt_binary_scalar,
3046+
vec![false, false, false, true]
3047+
);
3048+
3049+
test_binary!(
3050+
test_binary_array_gt_eq,
3051+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3052+
vec![b"flight", b"flight", b"flight", b"flight"],
3053+
gt_eq_binary,
3054+
vec![false, false, true, true]
3055+
);
3056+
test_binary_scalar!(
3057+
test_binary_array_gt_eq_scalar,
3058+
vec![b"arrow", b"datafusion", b"flight", b"parquet"],
3059+
"flight".as_bytes(),
3060+
gt_eq_binary_scalar,
3061+
vec![false, false, true, true]
3062+
);
3063+
27973064
// Expected behaviour:
27983065
// contains("ab", ["ab", "cd", null]) = true
27993066
// contains("ef", ["ab", "cd", null]) = false

0 commit comments

Comments
 (0)