Skip to content

Commit 4dc97b2

Browse files
authored
bench: add array_agg benchmark (#14302)
* bench: add array_agg benchmark * format * rename variable
1 parent f510800 commit 4dc97b2

File tree

2 files changed

+190
-0
lines changed

2 files changed

+190
-0
lines changed

datafusion/functions-aggregate/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,7 @@ harness = false
6666
[[bench]]
6767
name = "sum"
6868
harness = false
69+
70+
[[bench]]
71+
name = "array_agg"
72+
harness = false
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::sync::Arc;
19+
20+
use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray};
21+
use arrow::datatypes::Int64Type;
22+
use arrow::util::bench_util::create_primitive_array;
23+
use arrow_schema::Field;
24+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25+
use datafusion_expr::Accumulator;
26+
use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;
27+
28+
use arrow::util::test_util::seedable_rng;
29+
use arrow_buffer::{NullBufferBuilder, OffsetBuffer};
30+
use rand::distributions::{Distribution, Standard};
31+
use rand::Rng;
32+
33+
fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) {
34+
let list_item_data_type = values.as_list::<i32>().values().data_type().clone();
35+
c.bench_function(name, |b| {
36+
b.iter(|| {
37+
#[allow(clippy::unit_arg)]
38+
black_box(
39+
ArrayAggAccumulator::try_new(&list_item_data_type)
40+
.unwrap()
41+
.merge_batch(&[values.clone()])
42+
.unwrap(),
43+
)
44+
})
45+
});
46+
}
47+
48+
/// Create List array with the given item data type, null density, null locations and zero length lists density
49+
/// Creates an random (but fixed-seeded) array of a given size and null density
50+
pub fn create_list_array<T>(
51+
size: usize,
52+
null_density: f32,
53+
zero_length_lists_probability: f32,
54+
) -> ListArray
55+
where
56+
T: ArrowPrimitiveType,
57+
Standard: Distribution<T::Native>,
58+
{
59+
let mut nulls_builder = NullBufferBuilder::new(size);
60+
let mut rng = seedable_rng();
61+
62+
let offsets = OffsetBuffer::from_lengths((0..size).map(|_| {
63+
let is_null = rng.gen::<f32>() < null_density;
64+
65+
let mut length = rng.gen_range(1..10);
66+
67+
if is_null {
68+
nulls_builder.append_null();
69+
70+
if rng.gen::<f32>() <= zero_length_lists_probability {
71+
length = 0;
72+
}
73+
} else {
74+
nulls_builder.append_non_null();
75+
}
76+
77+
length
78+
}));
79+
80+
let length = *offsets.last().unwrap() as usize;
81+
82+
let values = create_primitive_array::<T>(length, 0.0);
83+
84+
let field = Field::new_list_field(T::DATA_TYPE, true);
85+
86+
ListArray::new(
87+
Arc::new(field),
88+
offsets,
89+
Arc::new(values),
90+
nulls_builder.finish(),
91+
)
92+
}
93+
94+
fn array_agg_benchmark(c: &mut Criterion) {
95+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.0, 1.0)) as ArrayRef;
96+
merge_batch_bench(c, "array_agg i64 merge_batch no nulls", values);
97+
98+
let values = Arc::new(create_list_array::<Int64Type>(8192, 1.0, 1.0)) as ArrayRef;
99+
merge_batch_bench(
100+
c,
101+
"array_agg i64 merge_batch all nulls, 100% of nulls point to a zero length array",
102+
values,
103+
);
104+
105+
let values = Arc::new(create_list_array::<Int64Type>(8192, 1.0, 0.9)) as ArrayRef;
106+
merge_batch_bench(
107+
c,
108+
"array_agg i64 merge_batch all nulls, 90% of nulls point to a zero length array",
109+
values,
110+
);
111+
112+
// All nulls point to a 0 length array
113+
114+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.3, 1.0)) as ArrayRef;
115+
merge_batch_bench(
116+
c,
117+
"array_agg i64 merge_batch 30% nulls, 100% of nulls point to a zero length array",
118+
values,
119+
);
120+
121+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.7, 1.0)) as ArrayRef;
122+
merge_batch_bench(
123+
c,
124+
"array_agg i64 merge_batch 70% nulls, 100% of nulls point to a zero length array",
125+
values,
126+
);
127+
128+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.3, 0.99)) as ArrayRef;
129+
merge_batch_bench(
130+
c,
131+
"array_agg i64 merge_batch 30% nulls, 99% of nulls point to a zero length array",
132+
values,
133+
);
134+
135+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.7, 0.99)) as ArrayRef;
136+
merge_batch_bench(
137+
c,
138+
"array_agg i64 merge_batch 70% nulls, 99% of nulls point to a zero length array",
139+
values,
140+
);
141+
142+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.3, 0.9)) as ArrayRef;
143+
merge_batch_bench(
144+
c,
145+
"array_agg i64 merge_batch 30% nulls, 90% of nulls point to a zero length array",
146+
values,
147+
);
148+
149+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.7, 0.9)) as ArrayRef;
150+
merge_batch_bench(
151+
c,
152+
"array_agg i64 merge_batch 70% nulls, 90% of nulls point to a zero length array",
153+
values,
154+
);
155+
156+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.3, 0.50)) as ArrayRef;
157+
merge_batch_bench(
158+
c,
159+
"array_agg i64 merge_batch 30% nulls, 50% of nulls point to a zero length array",
160+
values,
161+
);
162+
163+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.7, 0.50)) as ArrayRef;
164+
merge_batch_bench(
165+
c,
166+
"array_agg i64 merge_batch 70% nulls, 50% of nulls point to a zero length array",
167+
values,
168+
);
169+
170+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.3, 0.0)) as ArrayRef;
171+
merge_batch_bench(
172+
c,
173+
"array_agg i64 merge_batch 30% nulls, 0% of nulls point to a zero length array",
174+
values,
175+
);
176+
177+
let values = Arc::new(create_list_array::<Int64Type>(8192, 0.7, 0.0)) as ArrayRef;
178+
merge_batch_bench(
179+
c,
180+
"array_agg i64 merge_batch 70% nulls, 0% of nulls point to a zero length array",
181+
values,
182+
);
183+
}
184+
185+
criterion_group!(benches, array_agg_benchmark);
186+
criterion_main!(benches);

0 commit comments

Comments
 (0)