Skip to content

Commit 2d6352b

Browse files
authored
Remove explicit simd arithmetic kernels except for division/modulo (#1221)
* Extend arithmetic benchmarks * Remove explicit simd arithmetic except for div/mod because autovectorization generates better code * Remove unneeded return keywords
1 parent 5e435e2 commit 2d6352b

File tree

3 files changed

+68
-237
lines changed

3 files changed

+68
-237
lines changed

arrow/benches/arithmetic_kernels.rs

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use std::sync::Arc;
2424

2525
extern crate arrow;
2626

27-
use arrow::compute::kernels::limit::*;
2827
use arrow::util::bench_util::*;
2928
use arrow::{array::*, datatypes::Float32Type};
3029
use arrow::{compute::kernels::arithmetic::*, util::test_util::seedable_rng};
@@ -59,44 +58,69 @@ fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) {
5958
criterion::black_box(divide(arr_a, arr_b).unwrap());
6059
}
6160

61+
fn bench_divide_unchecked(arr_a: &ArrayRef, arr_b: &ArrayRef) {
62+
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
63+
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
64+
criterion::black_box(divide_unchecked(arr_a, arr_b).unwrap());
65+
}
66+
6267
fn bench_divide_scalar(array: &ArrayRef, divisor: f32) {
6368
let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
6469
criterion::black_box(divide_scalar(array, divisor).unwrap());
6570
}
6671

67-
fn bench_limit(arr_a: &ArrayRef, max: usize) {
68-
criterion::black_box(limit(arr_a, max));
72+
fn bench_modulo(arr_a: &ArrayRef, arr_b: &ArrayRef) {
73+
let arr_a = arr_a.as_any().downcast_ref::<Float32Array>().unwrap();
74+
let arr_b = arr_b.as_any().downcast_ref::<Float32Array>().unwrap();
75+
criterion::black_box(modulus(arr_a, arr_b).unwrap());
76+
}
77+
78+
fn bench_modulo_scalar(array: &ArrayRef, divisor: f32) {
79+
let array = array.as_any().downcast_ref::<Float32Array>().unwrap();
80+
criterion::black_box(modulus_scalar(array, divisor).unwrap());
6981
}
7082

7183
fn add_benchmark(c: &mut Criterion) {
72-
let arr_a = create_array(512, false);
73-
let arr_b = create_array(512, false);
84+
const BATCH_SIZE: usize = 64 * 1024;
85+
let arr_a = create_array(BATCH_SIZE, false);
86+
let arr_b = create_array(BATCH_SIZE, false);
7487
let scalar = seedable_rng().gen();
7588

76-
c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
77-
c.bench_function("subtract 512", |b| {
78-
b.iter(|| bench_subtract(&arr_a, &arr_b))
89+
c.bench_function("add", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
90+
c.bench_function("subtract", |b| b.iter(|| bench_subtract(&arr_a, &arr_b)));
91+
c.bench_function("multiply", |b| b.iter(|| bench_multiply(&arr_a, &arr_b)));
92+
c.bench_function("divide", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
93+
c.bench_function("divide_unchecked", |b| {
94+
b.iter(|| bench_divide_unchecked(&arr_a, &arr_b))
7995
});
80-
c.bench_function("multiply 512", |b| {
81-
b.iter(|| bench_multiply(&arr_a, &arr_b))
82-
});
83-
c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
84-
c.bench_function("divide_scalar 512", |b| {
96+
c.bench_function("divide_scalar", |b| {
8597
b.iter(|| bench_divide_scalar(&arr_a, scalar))
8698
});
87-
c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512)));
99+
c.bench_function("modulo", |b| b.iter(|| bench_modulo(&arr_a, &arr_b)));
100+
c.bench_function("modulo_scalar", |b| {
101+
b.iter(|| bench_modulo_scalar(&arr_a, scalar))
102+
});
88103

89-
let arr_a_nulls = create_array(512, false);
90-
let arr_b_nulls = create_array(512, false);
91-
c.bench_function("add_nulls_512", |b| {
104+
let arr_a_nulls = create_array(BATCH_SIZE, true);
105+
let arr_b_nulls = create_array(BATCH_SIZE, true);
106+
c.bench_function("add_nulls", |b| {
92107
b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
93108
});
94-
c.bench_function("divide_nulls_512", |b| {
109+
c.bench_function("divide_nulls", |b| {
95110
b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
96111
});
97-
c.bench_function("divide_scalar_nulls_512", |b| {
112+
c.bench_function("divide_nulls_unchecked", |b| {
113+
b.iter(|| bench_divide_unchecked(&arr_a_nulls, &arr_b_nulls))
114+
});
115+
c.bench_function("divide_scalar_nulls", |b| {
98116
b.iter(|| bench_divide_scalar(&arr_a_nulls, scalar))
99117
});
118+
c.bench_function("modulo_nulls", |b| {
119+
b.iter(|| bench_modulo(&arr_a_nulls, &arr_b_nulls))
120+
});
121+
c.bench_function("modulo_scalar_nulls", |b| {
122+
b.iter(|| bench_modulo_scalar(&arr_a_nulls, scalar))
123+
});
100124
}
101125

102126
criterion_group!(benches, add_benchmark);

arrow/src/buffer/immutable.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ impl Buffer {
153153
///
154154
/// Note that this should be used cautiously, and the returned pointer should not be
155155
/// stored anywhere, to avoid dangling pointers.
156+
#[inline]
156157
pub fn as_ptr(&self) -> *const u8 {
157158
unsafe { self.data.ptr().as_ptr().add(self.offset) }
158159
}

0 commit comments

Comments
 (0)