Skip to content

Commit 8dd6abb

Browse files
Dandandanalamb
authored andcommitted
ARROW-12032: [Rust] Optimize comparison kernels
This adds a function `from_trusted_len_iter_bool` to speed up the creation of an array for booleans. Benchmarks are a bit noisy, but seems to be ~10-20% faster for comparison kernels. This also has some positive effect on DataFusion queries, as they contain quite some (nested) comparisons in filters. For example, executing tpch query 6 in memory is ~7% faster. ``` Gnuplot not found, using plotters backend eq Float32 time: [54.204 us 54.284 us 54.364 us] change: [-29.087% -28.838% -28.581%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 5 (5.00%) low mild 1 (1.00%) high mild eq scalar Float32 time: [43.660 us 43.743 us 43.830 us] change: [-30.819% -30.545% -30.269%] (p = 0.00 < 0.05) Performance has improved. Found 5 outliers among 100 measurements (5.00%) 4 (4.00%) high mild 1 (1.00%) high severe neq Float32 time: [68.726 us 68.893 us 69.048 us] change: [-14.045% -13.772% -13.490%] (p = 0.00 < 0.05) Performance has improved. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild neq scalar Float32 time: [46.251 us 46.322 us 46.395 us] change: [-12.204% -11.952% -11.702%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 1 (1.00%) low mild 5 (5.00%) high mild lt Float32 time: [50.264 us 50.438 us 50.613 us] change: [-21.300% -20.964% -20.649%] (p = 0.00 < 0.05) Performance has improved. lt scalar Float32 time: [48.847 us 48.929 us 49.013 us] change: [-10.132% -9.9180% -9.6910%] (p = 0.00 < 0.05) Performance has improved. Found 5 outliers among 100 measurements (5.00%) 4 (4.00%) high mild 1 (1.00%) high severe lt_eq Float32 time: [46.105 us 46.198 us 46.282 us] change: [-21.276% -20.966% -20.703%] (p = 0.00 < 0.05) Performance has improved. Found 18 outliers among 100 measurements (18.00%) 2 (2.00%) low severe 13 (13.00%) low mild 1 (1.00%) high mild 2 (2.00%) high severe lt_eq scalar Float32 time: [47.359 us 47.456 us 47.593 us] change: [+0.2766% +0.5240% +0.7821%] (p = 0.00 < 0.05) Change within noise threshold. Found 10 outliers among 100 measurements (10.00%) 8 (8.00%) high mild 2 (2.00%) high severe gt Float32 time: [57.313 us 57.363 us 57.412 us] change: [-18.328% -18.177% -18.031%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 2 (2.00%) low severe 1 (1.00%) low mild gt scalar Float32 time: [44.091 us 44.132 us 44.175 us] change: [-9.4233% -9.2747% -9.1273%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 4 (4.00%) low mild 3 (3.00%) high mild gt_eq Float32 time: [55.856 us 55.932 us 56.007 us] change: [-7.4997% -7.2656% -7.0334%] (p = 0.00 < 0.05) Performance has improved. Found 3 outliers among 100 measurements (3.00%) 1 (1.00%) low mild 2 (2.00%) high mild gt_eq scalar Float32 time: [42.365 us 42.419 us 42.482 us] change: [+0.5289% +0.7174% +0.9116%] (p = 0.00 < 0.05) Change within noise threshold. Found 5 outliers among 100 measurements (5.00%) 2 (2.00%) high mild 3 (3.00%) high severe ``` Closes #9759 from Dandandan/optimize_comparison Authored-by: Heres, Daniel <danielheres@gmail.com> Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent fe6ef70 commit 8dd6abb

File tree

4 files changed

+187
-15
lines changed

4 files changed

+187
-15
lines changed

rust/arrow/benches/comparison_kernels.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ fn bench_nlike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
121121

122122
fn add_benchmark(c: &mut Criterion) {
123123
let size = 65536;
124-
let arr_a = create_primitive_array::<Float32Type>(size, 0.0);
125-
let arr_b = create_primitive_array::<Float32Type>(size, 0.0);
124+
let arr_a = create_primitive_array_with_seed::<Float32Type>(size, 0.0, 42);
125+
let arr_b = create_primitive_array_with_seed::<Float32Type>(size, 0.0, 43);
126126

127127
let arr_string = create_string_array(size, 0.0);
128128

rust/arrow/src/buffer/mutable.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,61 @@ impl MutableBuffer {
415415
buffer
416416
}
417417

418+
/// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
419+
/// # use arrow::buffer::MutableBuffer;
420+
/// # Example
421+
/// ```
422+
/// # use arrow::buffer::MutableBuffer;
423+
/// let v = vec![false, true, false];
424+
/// let iter = v.iter().map(|x| *x || true);
425+
/// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
426+
/// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
427+
/// ```
428+
/// # Safety
429+
/// This method assumes that the iterator's size is correct and is undefined behavior
430+
/// to use it on an iterator that reports an incorrect length.
431+
// This implementation is required for two reasons:
432+
// 1. there is no trait `TrustedLen` in stable rust and therefore
433+
// we can't specialize `extend` for `TrustedLen` like `Vec` does.
434+
// 2. `from_trusted_len_iter_bool` is faster.
435+
pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
436+
mut iterator: I,
437+
) -> Self {
438+
let (_, upper) = iterator.size_hint();
439+
let upper = upper.expect("from_trusted_len_iter requires an upper limit");
440+
441+
let mut result = {
442+
let byte_capacity: usize = upper.saturating_add(7) / 8;
443+
MutableBuffer::new(byte_capacity)
444+
};
445+
446+
'a: loop {
447+
let mut byte_accum: u8 = 0;
448+
let mut mask: u8 = 1;
449+
450+
//collect (up to) 8 bits into a byte
451+
while mask != 0 {
452+
if let Some(value) = iterator.next() {
453+
byte_accum |= match value {
454+
true => mask,
455+
false => 0,
456+
};
457+
mask <<= 1;
458+
} else {
459+
if mask != 1 {
460+
// Add last byte
461+
result.push_unchecked(byte_accum);
462+
}
463+
break 'a;
464+
}
465+
}
466+
467+
// Soundness: from_trusted_len
468+
result.push_unchecked(byte_accum);
469+
}
470+
result
471+
}
472+
418473
/// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
419474
/// if any of the items of the iterator is an error.
420475
/// Prefer this to `collect` whenever possible, as it is faster ~60% faster.

rust/arrow/src/compute/kernels/comparison.rs

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,72 @@ macro_rules! compare_op {
4646
let null_bit_buffer =
4747
combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
4848

49-
let buffer = (0..$left.len())
50-
.map(|i| $op($left.value(i), $right.value(i)))
51-
.collect();
49+
let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right.value(i)));
50+
// same size as $left.len() and $right.len()
51+
let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
5252

5353
let data = ArrayData::new(
5454
DataType::Boolean,
5555
$left.len(),
5656
None,
5757
null_bit_buffer,
5858
0,
59-
vec![buffer],
59+
vec![Buffer::from(buffer)],
60+
vec![],
61+
);
62+
Ok(BooleanArray::from(Arc::new(data)))
63+
}};
64+
}
65+
66+
macro_rules! compare_op_primitive {
67+
($left: expr, $right:expr, $op:expr) => {{
68+
if $left.len() != $right.len() {
69+
return Err(ArrowError::ComputeError(
70+
"Cannot perform comparison operation on arrays of different length"
71+
.to_string(),
72+
));
73+
}
74+
75+
let null_bit_buffer =
76+
combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
77+
78+
let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
79+
let lhs_chunks_iter = $left.values().chunks_exact(8);
80+
let lhs_remainder = lhs_chunks_iter.remainder();
81+
let rhs_chunks_iter = $right.values().chunks_exact(8);
82+
let rhs_remainder = rhs_chunks_iter.remainder();
83+
let chunks = $left.len() / 8;
84+
85+
values[..chunks]
86+
.iter_mut()
87+
.zip(lhs_chunks_iter)
88+
.zip(rhs_chunks_iter)
89+
.for_each(|((byte, lhs), rhs)| {
90+
lhs.iter()
91+
.zip(rhs.iter())
92+
.enumerate()
93+
.for_each(|(i, (&lhs, &rhs))| {
94+
*byte |= if $op(lhs, rhs) { 1 << i } else { 0 };
95+
});
96+
});
97+
98+
if !lhs_remainder.is_empty() {
99+
let last = &mut values[chunks];
100+
lhs_remainder
101+
.iter()
102+
.zip(rhs_remainder.iter())
103+
.enumerate()
104+
.for_each(|(i, (&lhs, &rhs))| {
105+
*last |= if $op(lhs, rhs) { 1 << i } else { 0 };
106+
});
107+
};
108+
let data = ArrayData::new(
109+
DataType::Boolean,
110+
$left.len(),
111+
None,
112+
null_bit_buffer,
113+
0,
114+
vec![Buffer::from(values)],
60115
vec![],
61116
);
62117
Ok(BooleanArray::from(data))
@@ -67,17 +122,54 @@ macro_rules! compare_op_scalar {
67122
($left: expr, $right:expr, $op:expr) => {{
68123
let null_bit_buffer = $left.data().null_buffer().cloned();
69124

70-
let buffer = (0..$left.len())
71-
.map(|i| $op($left.value(i), $right))
72-
.collect();
125+
let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right));
126+
// same as $left.len()
127+
let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
128+
129+
let data = ArrayData::new(
130+
DataType::Boolean,
131+
$left.len(),
132+
None,
133+
null_bit_buffer,
134+
0,
135+
vec![Buffer::from(buffer)],
136+
vec![],
137+
);
138+
Ok(BooleanArray::from(Arc::new(data)))
139+
}};
140+
}
141+
142+
macro_rules! compare_op_scalar_primitive {
143+
($left: expr, $right:expr, $op:expr) => {{
144+
let null_bit_buffer = $left.data().null_buffer().cloned();
145+
146+
let mut values = MutableBuffer::from_len_zeroed(($left.len() + 7) / 8);
147+
let lhs_chunks_iter = $left.values().chunks_exact(8);
148+
let lhs_remainder = lhs_chunks_iter.remainder();
149+
let chunks = $left.len() / 8;
150+
151+
values[..chunks]
152+
.iter_mut()
153+
.zip(lhs_chunks_iter)
154+
.for_each(|(byte, chunk)| {
155+
chunk.iter().enumerate().for_each(|(i, &c_i)| {
156+
*byte |= if $op(c_i, $right) { 1 << i } else { 0 };
157+
});
158+
});
159+
if !lhs_remainder.is_empty() {
160+
let last = &mut values[chunks];
161+
lhs_remainder.iter().enumerate().for_each(|(i, &lhs)| {
162+
*last |= if $op(lhs, $right) { 1 << i } else { 0 };
163+
});
164+
};
73165

74166
let data = ArrayData::new(
75167
DataType::Boolean,
76168
$left.len(),
77169
None,
78170
null_bit_buffer,
79171
0,
80-
vec![buffer],
172+
vec![Buffer::from(values)],
81173
vec![],
82174
);
83175
Ok(BooleanArray::from(data))
@@ -95,7 +187,7 @@ where
95187
T: ArrowNumericType,
96188
F: Fn(T::Native, T::Native) -> bool,
97189
{
98-
compare_op!(left, right, op)
190+
compare_op_primitive!(left, right, op)
99191
}
100192

101193
/// Evaluate `op(left, right)` for [`PrimitiveArray`] and scalar using
@@ -109,7 +201,7 @@ where
109201
T: ArrowNumericType,
110202
F: Fn(T::Native, T::Native) -> bool,
111203
{
112-
compare_op_scalar!(left, right, op)
204+
compare_op_scalar_primitive!(left, right, op)
113205
}
114206

115207
/// Perform SQL `left LIKE right` operation on [`StringArray`] / [`LargeStringArray`].

rust/arrow/src/util/bench_util.rs

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,15 @@
1717

1818
//! Utils to make benchmarking easier
1919
20-
use rand::distributions::{Alphanumeric, Distribution, Standard};
21-
use rand::Rng;
22-
2320
use crate::array::*;
2421
use crate::datatypes::*;
2522
use crate::util::test_util::seedable_rng;
23+
use rand::Rng;
24+
use rand::SeedableRng;
25+
use rand::{
26+
distributions::{Alphanumeric, Distribution, Standard},
27+
prelude::StdRng,
28+
};
2629

2730
/// Creates an random (but fixed-seeded) array of a given size and null density
2831
pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
@@ -43,6 +46,28 @@ where
4346
.collect()
4447
}
4548

49+
pub fn create_primitive_array_with_seed<T>(
50+
size: usize,
51+
null_density: f32,
52+
seed: u64,
53+
) -> PrimitiveArray<T>
54+
where
55+
T: ArrowPrimitiveType,
56+
Standard: Distribution<T::Native>,
57+
{
58+
let mut rng = StdRng::seed_from_u64(seed);
59+
60+
(0..size)
61+
.map(|_| {
62+
if rng.gen::<f32>() < null_density {
63+
None
64+
} else {
65+
Some(rng.gen())
66+
}
67+
})
68+
.collect()
69+
}
70+
4671
/// Creates an random (but fixed-seeded) array of a given size and null density
4772
pub fn create_boolean_array(
4873
size: usize,

0 commit comments

Comments
 (0)