Skip to content

Commit 44e63e0

Browse files
Benchmark for char expression (#16743)
* add benchmark for char function * add license header * add benchmarking * update return type * chore: fix benchmark --------- Co-authored-by: Yuri Astrakhan <YuriAstrakhan@gmail.com>
1 parent d3cacac commit 44e63e0

File tree

3 files changed

+84
-0
lines changed

3 files changed

+84
-0
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/spark/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,11 @@ datafusion-expr = { workspace = true }
4444
datafusion-functions = { workspace = true, features = ["crypto_expressions"] }
4545
datafusion-macros = { workspace = true }
4646
log = { workspace = true }
47+
48+
[dev-dependencies]
49+
criterion = { workspace = true }
50+
rand = { workspace = true }
51+
52+
[[bench]]
53+
harness = false
54+
name = "char"

datafusion/spark/benches/char.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::datatypes::{DataType, Field};
21+
use arrow::{array::PrimitiveArray, datatypes::Int64Type};
22+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
23+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
24+
use datafusion_spark::function::string::char;
25+
use rand::rngs::StdRng;
26+
use rand::{Rng, SeedableRng};
27+
use std::sync::Arc;
28+
29+
/// Returns fixed seedable RNG
30+
pub fn seedable_rng() -> StdRng {
31+
StdRng::seed_from_u64(42)
32+
}
33+
34+
fn criterion_benchmark(c: &mut Criterion) {
35+
let cot_fn = char();
36+
let size = 1024;
37+
let input: PrimitiveArray<Int64Type> = {
38+
let null_density = 0.2;
39+
let mut rng = StdRng::seed_from_u64(42);
40+
(0..size)
41+
.map(|_| {
42+
if rng.random::<f32>() < null_density {
43+
None
44+
} else {
45+
Some(rng.random_range::<i64, _>(1i64..10_000))
46+
}
47+
})
48+
.collect()
49+
};
50+
let input = Arc::new(input);
51+
let args = vec![ColumnarValue::Array(input)];
52+
let arg_fields = args
53+
.iter()
54+
.enumerate()
55+
.map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
56+
.collect::<Vec<_>>();
57+
58+
c.bench_function("char", |b| {
59+
b.iter(|| {
60+
black_box(
61+
cot_fn
62+
.invoke_with_args(ScalarFunctionArgs {
63+
args: args.clone(),
64+
arg_fields: arg_fields.clone(),
65+
number_rows: size,
66+
return_field: Arc::new(Field::new("f", DataType::Utf8, true)),
67+
})
68+
.unwrap(),
69+
)
70+
})
71+
});
72+
}
73+
criterion_group!(benches, criterion_benchmark);
74+
criterion_main!(benches);

0 commit comments

Comments
 (0)