Skip to content

Commit 5f39c3f

Browse files
Stage benchmark reference
1 parent d20b6d1 commit 5f39c3f

File tree

1 file changed

+156
-45
lines changed

1 file changed

+156
-45
lines changed

datafusion/functions/benches/to_char.rs

Lines changed: 156 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,10 @@ use rand::seq::SliceRandom;
2929
use rand::Rng;
3030

3131
use datafusion_common::ScalarValue;
32-
use datafusion_common::ScalarValue::TimestampNanosecond;
3332
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
3433
use datafusion_functions::datetime::to_char;
3534

36-
fn random_date_in_range(
35+
fn pick_date_in_range(
3736
rng: &mut ThreadRng,
3837
start_date: NaiveDate,
3938
end_date: NaiveDate,
@@ -43,7 +42,7 @@ fn random_date_in_range(
4342
start_date + TimeDelta::try_days(random_days).unwrap()
4443
}
4544

46-
fn data(rng: &mut ThreadRng) -> Date32Array {
45+
fn generate_date32_array(rng: &mut ThreadRng) -> Date32Array {
4746
let mut data: Vec<i32> = vec![];
4847
let unix_days_from_ce = NaiveDate::from_ymd_opt(1970, 1, 1)
4948
.unwrap()
@@ -56,37 +55,83 @@ fn data(rng: &mut ThreadRng) -> Date32Array {
5655
.expect("Date should parse");
5756
for _ in 0..1000 {
5857
data.push(
59-
random_date_in_range(rng, start_date, end_date).num_days_from_ce()
58+
pick_date_in_range(rng, start_date, end_date).num_days_from_ce()
6059
- unix_days_from_ce,
6160
);
6261
}
6362

6463
Date32Array::from(data)
6564
}
6665

67-
fn patterns(rng: &mut ThreadRng) -> StringArray {
68-
let samples = [
69-
"%Y:%m:%d".to_string(),
70-
"%d-%m-%Y".to_string(),
71-
"%d%m%Y".to_string(),
72-
"%Y%m%d".to_string(),
73-
"%Y...%m...%d".to_string(),
74-
];
75-
let mut data: Vec<String> = vec![];
66+
const DATE_PATTERNS: [&'static str; 5] =
67+
["%Y:%m:%d", "%d-%m-%Y", "%d%m%Y", "%Y%m%d", "%Y...%m...%d"];
68+
69+
const DATETIME_PATTERNS: [&'static str; 8] = [
70+
"%Y:%m:%d %H:%M%S",
71+
"%Y:%m:%d %_H:%M%S",
72+
"%Y:%m:%d %k:%M%S",
73+
"%d-%m-%Y %I%P-%M-%S %f",
74+
"%d%m%Y %H",
75+
"%Y%m%d %M-%S %.3f",
76+
"%Y...%m...%d %T%3f",
77+
"%c",
78+
];
79+
80+
fn pick_date_pattern(rng: &mut ThreadRng) -> String {
81+
DATE_PATTERNS
82+
.choose(rng)
83+
.expect("Empty list of date patterns")
84+
.to_string()
85+
}
86+
87+
fn pick_date_time_pattern(rng: &mut ThreadRng) -> String {
88+
DATETIME_PATTERNS
89+
.choose(rng)
90+
.expect("Empty list of date time patterns")
91+
.to_string()
92+
}
93+
94+
fn pick_date_and_date_time_mixed_pattern(rng: &mut ThreadRng) -> String {
95+
match rng.gen_bool(0.5) {
96+
true => pick_date_pattern(rng),
97+
false => pick_date_time_pattern(rng),
98+
}
99+
}
100+
101+
fn generate_pattern_array(
102+
rng: &mut ThreadRng,
103+
mut pick_fn: impl FnMut(&mut ThreadRng) -> String,
104+
) -> StringArray {
105+
let mut data = Vec::with_capacity(1000);
106+
76107
for _ in 0..1000 {
77-
data.push(samples.choose(rng).unwrap().to_string());
108+
data.push(pick_fn(rng));
78109
}
79110

80111
StringArray::from(data)
81112
}
82113

114+
fn generate_date_pattern_array(rng: &mut ThreadRng) -> StringArray {
115+
generate_pattern_array(rng, pick_date_pattern)
116+
}
117+
118+
fn generate_datetime_pattern_array(rng: &mut ThreadRng) -> StringArray {
119+
generate_pattern_array(rng, pick_date_time_pattern)
120+
}
121+
122+
fn generate_mixed_pattern_array(rng: &mut ThreadRng) -> StringArray {
123+
generate_pattern_array(rng, pick_date_and_date_time_mixed_pattern)
124+
}
125+
83126
fn criterion_benchmark(c: &mut Criterion) {
84-
c.bench_function("to_char_array_array_1000", |b| {
127+
c.bench_function("to_char_array_date_only_patterns_1000", |b| {
85128
let mut rng = rand::thread_rng();
86-
let data_arr = data(&mut rng);
129+
let data_arr = generate_date32_array(&mut rng);
87130
let batch_len = data_arr.len();
88131
let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
89-
let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef);
132+
let patterns = ColumnarValue::Array(Arc::new(generate_date_pattern_array(
133+
&mut rng,
134+
)) as ArrayRef);
90135

91136
b.iter(|| {
92137
black_box(
@@ -101,13 +146,57 @@ fn criterion_benchmark(c: &mut Criterion) {
101146
})
102147
});
103148

104-
c.bench_function("to_char_array_scalar_1000", |b| {
149+
// c.bench_function("to_char_array_datetime_patterns_1000", |b| {
150+
// let mut rng = rand::thread_rng();
151+
// let data_arr = generate_date32_array(&mut rng);
152+
// let batch_len = data_arr.len();
153+
// let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
154+
// let patterns = ColumnarValue::Array(Arc::new(generate_datetime_pattern_array(
155+
// &mut rng,
156+
// )) as ArrayRef);
157+
//
158+
// b.iter(|| {
159+
// black_box(
160+
// to_char()
161+
// .invoke_with_args(ScalarFunctionArgs {
162+
// args: vec![data.clone(), patterns.clone()],
163+
// number_rows: batch_len,
164+
// return_type: &DataType::Utf8,
165+
// })
166+
// .expect("to_char should work on valid values"),
167+
// )
168+
// })
169+
// });
170+
171+
// c.bench_function("to_char_array_mixed_patterns_1000", |b| {
172+
// let mut rng = rand::thread_rng();
173+
// let data_arr = generate_date32_array(&mut rng);
174+
// let batch_len = data_arr.len();
175+
// let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
176+
// let patterns = ColumnarValue::Array(Arc::new(generate_mixed_pattern_array(
177+
// &mut rng,
178+
// )) as ArrayRef);
179+
//
180+
// b.iter(|| {
181+
// black_box(
182+
// to_char()
183+
// .invoke_with_args(ScalarFunctionArgs {
184+
// args: vec![data.clone(), patterns.clone()],
185+
// number_rows: batch_len,
186+
// return_type: &DataType::Utf8,
187+
// })
188+
// .expect("to_char should work on valid values"),
189+
// )
190+
// })
191+
// });
192+
193+
c.bench_function("to_char_scalar_date_only_pattern_1000", |b| {
105194
let mut rng = rand::thread_rng();
106-
let data_arr = data(&mut rng);
195+
let data_arr = generate_date32_array(&mut rng);
107196
let batch_len = data_arr.len();
108197
let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
109198
let patterns =
110-
ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string())));
199+
ColumnarValue::Scalar(ScalarValue::Utf8(Some(pick_date_pattern(&mut rng))));
111200

112201
b.iter(|| {
113202
black_box(
@@ -122,32 +211,54 @@ fn criterion_benchmark(c: &mut Criterion) {
122211
})
123212
});
124213

125-
c.bench_function("to_char_scalar_scalar_1000", |b| {
126-
let timestamp = "2026-07-08T09:10:11"
127-
.parse::<NaiveDateTime>()
128-
.unwrap()
129-
.with_nanosecond(56789)
130-
.unwrap()
131-
.and_utc()
132-
.timestamp_nanos_opt()
133-
.unwrap();
134-
let data = ColumnarValue::Scalar(TimestampNanosecond(Some(timestamp), None));
135-
let pattern = ColumnarValue::Scalar(ScalarValue::Utf8(Some(
136-
"%d-%m-%Y %H:%M:%S".to_string(),
137-
)));
214+
// c.bench_function("to_char_scalar_datetime_pattern_1000", |b| {
215+
// let mut rng = rand::thread_rng();
216+
// let data_arr = generate_date32_array(&mut rng);
217+
// let batch_len = data_arr.len();
218+
// let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
219+
// let patterns = ColumnarValue::Scalar(ScalarValue::Utf8(Some(
220+
// pick_date_time_pattern(&mut rng),
221+
// )));
222+
//
223+
// b.iter(|| {
224+
// black_box(
225+
// to_char()
226+
// .invoke_with_args(ScalarFunctionArgs {
227+
// args: vec![data.clone(), patterns.clone()],
228+
// number_rows: batch_len,
229+
// return_type: &DataType::Utf8,
230+
// })
231+
// .expect("to_char should work on valid values"),
232+
// )
233+
// })
234+
// });
138235

139-
b.iter(|| {
140-
black_box(
141-
to_char()
142-
.invoke_with_args(ScalarFunctionArgs {
143-
args: vec![data.clone(), pattern.clone()],
144-
number_rows: 1,
145-
return_type: &DataType::Utf8,
146-
})
147-
.expect("to_char should work on valid values"),
148-
)
149-
})
150-
});
236+
// c.bench_function("to_char_scalar_1000", |b| {
237+
// let mut rng = rand::thread_rng();
238+
// let timestamp = "2026-07-08T09:10:11"
239+
// .parse::<NaiveDateTime>()
240+
// .unwrap()
241+
// .with_nanosecond(56789)
242+
// .unwrap()
243+
// .and_utc()
244+
// .timestamp_nanos_opt()
245+
// .unwrap();
246+
// let data = ColumnarValue::Scalar(TimestampNanosecond(Some(timestamp), None));
247+
// let pattern =
248+
// ColumnarValue::Scalar(ScalarValue::Utf8(Some(pick_date_pattern(&mut rng))));
249+
//
250+
// b.iter(|| {
251+
// black_box(
252+
// to_char()
253+
// .invoke_with_args(ScalarFunctionArgs {
254+
// args: vec![data.clone(), pattern.clone()],
255+
// number_rows: 1,
256+
// return_type: &DataType::Utf8,
257+
// })
258+
// .expect("to_char should work on valid values"),
259+
// )
260+
// })
261+
// });
151262
}
152263

153264
criterion_group!(benches, criterion_benchmark);

0 commit comments

Comments
 (0)