@@ -29,11 +29,10 @@ use rand::seq::SliceRandom;
29
29
use rand:: Rng ;
30
30
31
31
use datafusion_common:: ScalarValue ;
32
- use datafusion_common:: ScalarValue :: TimestampNanosecond ;
33
32
use datafusion_expr:: { ColumnarValue , ScalarFunctionArgs } ;
34
33
use datafusion_functions:: datetime:: to_char;
35
34
36
- fn random_date_in_range (
35
+ fn pick_date_in_range (
37
36
rng : & mut ThreadRng ,
38
37
start_date : NaiveDate ,
39
38
end_date : NaiveDate ,
@@ -43,7 +42,7 @@ fn random_date_in_range(
43
42
start_date + TimeDelta :: try_days ( random_days) . unwrap ( )
44
43
}
45
44
46
- fn data ( rng : & mut ThreadRng ) -> Date32Array {
45
+ fn generate_date32_array ( rng : & mut ThreadRng ) -> Date32Array {
47
46
let mut data: Vec < i32 > = vec ! [ ] ;
48
47
let unix_days_from_ce = NaiveDate :: from_ymd_opt ( 1970 , 1 , 1 )
49
48
. unwrap ( )
@@ -56,37 +55,83 @@ fn data(rng: &mut ThreadRng) -> Date32Array {
56
55
. expect ( "Date should parse" ) ;
57
56
for _ in 0 ..1000 {
58
57
data. push (
59
- random_date_in_range ( rng, start_date, end_date) . num_days_from_ce ( )
58
+ pick_date_in_range ( rng, start_date, end_date) . num_days_from_ce ( )
60
59
- unix_days_from_ce,
61
60
) ;
62
61
}
63
62
64
63
Date32Array :: from ( data)
65
64
}
66
65
67
- fn patterns ( rng : & mut ThreadRng ) -> StringArray {
68
- let samples = [
69
- "%Y:%m:%d" . to_string ( ) ,
70
- "%d-%m-%Y" . to_string ( ) ,
71
- "%d%m%Y" . to_string ( ) ,
72
- "%Y%m%d" . to_string ( ) ,
73
- "%Y...%m...%d" . to_string ( ) ,
74
- ] ;
75
- let mut data: Vec < String > = vec ! [ ] ;
66
+ const DATE_PATTERNS : [ & ' static str ; 5 ] =
67
+ [ "%Y:%m:%d" , "%d-%m-%Y" , "%d%m%Y" , "%Y%m%d" , "%Y...%m...%d" ] ;
68
+
69
+ const DATETIME_PATTERNS : [ & ' static str ; 8 ] = [
70
+ "%Y:%m:%d %H:%M%S" ,
71
+ "%Y:%m:%d %_H:%M%S" ,
72
+ "%Y:%m:%d %k:%M%S" ,
73
+ "%d-%m-%Y %I%P-%M-%S %f" ,
74
+ "%d%m%Y %H" ,
75
+ "%Y%m%d %M-%S %.3f" ,
76
+ "%Y...%m...%d %T%3f" ,
77
+ "%c" ,
78
+ ] ;
79
+
80
+ fn pick_date_pattern ( rng : & mut ThreadRng ) -> String {
81
+ DATE_PATTERNS
82
+ . choose ( rng)
83
+ . expect ( "Empty list of date patterns" )
84
+ . to_string ( )
85
+ }
86
+
87
+ fn pick_date_time_pattern ( rng : & mut ThreadRng ) -> String {
88
+ DATETIME_PATTERNS
89
+ . choose ( rng)
90
+ . expect ( "Empty list of date time patterns" )
91
+ . to_string ( )
92
+ }
93
+
94
+ fn pick_date_and_date_time_mixed_pattern ( rng : & mut ThreadRng ) -> String {
95
+ match rng. gen_bool ( 0.5 ) {
96
+ true => pick_date_pattern ( rng) ,
97
+ false => pick_date_time_pattern ( rng) ,
98
+ }
99
+ }
100
+
101
+ fn generate_pattern_array (
102
+ rng : & mut ThreadRng ,
103
+ mut pick_fn : impl FnMut ( & mut ThreadRng ) -> String ,
104
+ ) -> StringArray {
105
+ let mut data = Vec :: with_capacity ( 1000 ) ;
106
+
76
107
for _ in 0 ..1000 {
77
- data. push ( samples . choose ( rng) . unwrap ( ) . to_string ( ) ) ;
108
+ data. push ( pick_fn ( rng) ) ;
78
109
}
79
110
80
111
StringArray :: from ( data)
81
112
}
82
113
114
+ fn generate_date_pattern_array ( rng : & mut ThreadRng ) -> StringArray {
115
+ generate_pattern_array ( rng, pick_date_pattern)
116
+ }
117
+
118
+ fn generate_datetime_pattern_array ( rng : & mut ThreadRng ) -> StringArray {
119
+ generate_pattern_array ( rng, pick_date_time_pattern)
120
+ }
121
+
122
+ fn generate_mixed_pattern_array ( rng : & mut ThreadRng ) -> StringArray {
123
+ generate_pattern_array ( rng, pick_date_and_date_time_mixed_pattern)
124
+ }
125
+
83
126
fn criterion_benchmark ( c : & mut Criterion ) {
84
- c. bench_function ( "to_char_array_array_1000 " , |b| {
127
+ c. bench_function ( "to_char_array_date_only_patterns_1000 " , |b| {
85
128
let mut rng = rand:: thread_rng ( ) ;
86
- let data_arr = data ( & mut rng) ;
129
+ let data_arr = generate_date32_array ( & mut rng) ;
87
130
let batch_len = data_arr. len ( ) ;
88
131
let data = ColumnarValue :: Array ( Arc :: new ( data_arr) as ArrayRef ) ;
89
- let patterns = ColumnarValue :: Array ( Arc :: new ( patterns ( & mut rng) ) as ArrayRef ) ;
132
+ let patterns = ColumnarValue :: Array ( Arc :: new ( generate_date_pattern_array (
133
+ & mut rng,
134
+ ) ) as ArrayRef ) ;
90
135
91
136
b. iter ( || {
92
137
black_box (
@@ -101,13 +146,57 @@ fn criterion_benchmark(c: &mut Criterion) {
101
146
} )
102
147
} ) ;
103
148
104
- c. bench_function ( "to_char_array_scalar_1000" , |b| {
149
+ // c.bench_function("to_char_array_datetime_patterns_1000", |b| {
150
+ // let mut rng = rand::thread_rng();
151
+ // let data_arr = generate_date32_array(&mut rng);
152
+ // let batch_len = data_arr.len();
153
+ // let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
154
+ // let patterns = ColumnarValue::Array(Arc::new(generate_datetime_pattern_array(
155
+ // &mut rng,
156
+ // )) as ArrayRef);
157
+ //
158
+ // b.iter(|| {
159
+ // black_box(
160
+ // to_char()
161
+ // .invoke_with_args(ScalarFunctionArgs {
162
+ // args: vec![data.clone(), patterns.clone()],
163
+ // number_rows: batch_len,
164
+ // return_type: &DataType::Utf8,
165
+ // })
166
+ // .expect("to_char should work on valid values"),
167
+ // )
168
+ // })
169
+ // });
170
+
171
+ // c.bench_function("to_char_array_mixed_patterns_1000", |b| {
172
+ // let mut rng = rand::thread_rng();
173
+ // let data_arr = generate_date32_array(&mut rng);
174
+ // let batch_len = data_arr.len();
175
+ // let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
176
+ // let patterns = ColumnarValue::Array(Arc::new(generate_mixed_pattern_array(
177
+ // &mut rng,
178
+ // )) as ArrayRef);
179
+ //
180
+ // b.iter(|| {
181
+ // black_box(
182
+ // to_char()
183
+ // .invoke_with_args(ScalarFunctionArgs {
184
+ // args: vec![data.clone(), patterns.clone()],
185
+ // number_rows: batch_len,
186
+ // return_type: &DataType::Utf8,
187
+ // })
188
+ // .expect("to_char should work on valid values"),
189
+ // )
190
+ // })
191
+ // });
192
+
193
+ c. bench_function ( "to_char_scalar_date_only_pattern_1000" , |b| {
105
194
let mut rng = rand:: thread_rng ( ) ;
106
- let data_arr = data ( & mut rng) ;
195
+ let data_arr = generate_date32_array ( & mut rng) ;
107
196
let batch_len = data_arr. len ( ) ;
108
197
let data = ColumnarValue :: Array ( Arc :: new ( data_arr) as ArrayRef ) ;
109
198
let patterns =
110
- ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( "%Y-%m-%d" . to_string ( ) ) ) ) ;
199
+ ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( pick_date_pattern ( & mut rng ) ) ) ) ;
111
200
112
201
b. iter ( || {
113
202
black_box (
@@ -122,32 +211,54 @@ fn criterion_benchmark(c: &mut Criterion) {
122
211
} )
123
212
} ) ;
124
213
125
- c. bench_function ( "to_char_scalar_scalar_1000" , |b| {
126
- let timestamp = "2026-07-08T09:10:11"
127
- . parse :: < NaiveDateTime > ( )
128
- . unwrap ( )
129
- . with_nanosecond ( 56789 )
130
- . unwrap ( )
131
- . and_utc ( )
132
- . timestamp_nanos_opt ( )
133
- . unwrap ( ) ;
134
- let data = ColumnarValue :: Scalar ( TimestampNanosecond ( Some ( timestamp) , None ) ) ;
135
- let pattern = ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some (
136
- "%d-%m-%Y %H:%M:%S" . to_string ( ) ,
137
- ) ) ) ;
214
+ // c.bench_function("to_char_scalar_datetime_pattern_1000", |b| {
215
+ // let mut rng = rand::thread_rng();
216
+ // let data_arr = generate_date32_array(&mut rng);
217
+ // let batch_len = data_arr.len();
218
+ // let data = ColumnarValue::Array(Arc::new(data_arr) as ArrayRef);
219
+ // let patterns = ColumnarValue::Scalar(ScalarValue::Utf8(Some(
220
+ // pick_date_time_pattern(&mut rng),
221
+ // )));
222
+ //
223
+ // b.iter(|| {
224
+ // black_box(
225
+ // to_char()
226
+ // .invoke_with_args(ScalarFunctionArgs {
227
+ // args: vec![data.clone(), patterns.clone()],
228
+ // number_rows: batch_len,
229
+ // return_type: &DataType::Utf8,
230
+ // })
231
+ // .expect("to_char should work on valid values"),
232
+ // )
233
+ // })
234
+ // });
138
235
139
- b. iter ( || {
140
- black_box (
141
- to_char ( )
142
- . invoke_with_args ( ScalarFunctionArgs {
143
- args : vec ! [ data. clone( ) , pattern. clone( ) ] ,
144
- number_rows : 1 ,
145
- return_type : & DataType :: Utf8 ,
146
- } )
147
- . expect ( "to_char should work on valid values" ) ,
148
- )
149
- } )
150
- } ) ;
236
+ // c.bench_function("to_char_scalar_1000", |b| {
237
+ // let mut rng = rand::thread_rng();
238
+ // let timestamp = "2026-07-08T09:10:11"
239
+ // .parse::<NaiveDateTime>()
240
+ // .unwrap()
241
+ // .with_nanosecond(56789)
242
+ // .unwrap()
243
+ // .and_utc()
244
+ // .timestamp_nanos_opt()
245
+ // .unwrap();
246
+ // let data = ColumnarValue::Scalar(TimestampNanosecond(Some(timestamp), None));
247
+ // let pattern =
248
+ // ColumnarValue::Scalar(ScalarValue::Utf8(Some(pick_date_pattern(&mut rng))));
249
+ //
250
+ // b.iter(|| {
251
+ // black_box(
252
+ // to_char()
253
+ // .invoke_with_args(ScalarFunctionArgs {
254
+ // args: vec![data.clone(), pattern.clone()],
255
+ // number_rows: 1,
256
+ // return_type: &DataType::Utf8,
257
+ // })
258
+ // .expect("to_char should work on valid values"),
259
+ // )
260
+ // })
261
+ // });
151
262
}
152
263
153
264
criterion_group ! ( benches, criterion_benchmark) ;
0 commit comments