Skip to content

Commit f9d3133

Browse files
authored
Improve perfomance of reverse function (#14025)
* Improve perfomance of 'reverse' function Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com> * Apply sugestion change * Fix typo --------- Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
1 parent 80c828b commit f9d3133

File tree

3 files changed

+111
-9
lines changed

3 files changed

+111
-9
lines changed

datafusion/functions/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,11 @@ harness = false
204204
name = "strpos"
205205
required-features = ["unicode_expressions"]
206206

207+
[[bench]]
208+
harness = false
209+
name = "reverse"
210+
required-features = ["unicode_expressions"]
211+
207212
[[bench]]
208213
harness = false
209214
name = "trunc"
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
extern crate criterion;
19+
20+
use arrow::array::OffsetSizeTrait;
21+
use arrow::util::bench_util::{
22+
create_string_array_with_len, create_string_view_array_with_len,
23+
};
24+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
25+
use datafusion_expr::ColumnarValue;
26+
use datafusion_functions::unicode;
27+
use std::sync::Arc;
28+
29+
fn create_args<O: OffsetSizeTrait>(
30+
size: usize,
31+
str_len: usize,
32+
force_view_types: bool,
33+
) -> Vec<ColumnarValue> {
34+
if force_view_types {
35+
let string_array =
36+
Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
37+
38+
vec![ColumnarValue::Array(string_array)]
39+
} else {
40+
let string_array =
41+
Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
42+
43+
vec![ColumnarValue::Array(string_array)]
44+
}
45+
}
46+
47+
fn criterion_benchmark(c: &mut Criterion) {
48+
let reverse = unicode::reverse();
49+
for size in [1024, 4096] {
50+
let str_len = 8;
51+
52+
let args = create_args::<i32>(size, str_len, true);
53+
c.bench_function(
54+
format!("reverse_string_view [size={}, str_len={}]", size, str_len).as_str(),
55+
|b| {
56+
b.iter(|| {
57+
// TODO use invoke_with_args
58+
black_box(reverse.invoke_batch(&args, str_len))
59+
})
60+
},
61+
);
62+
63+
let str_len = 32;
64+
65+
let args = create_args::<i32>(size, str_len, true);
66+
c.bench_function(
67+
format!("reverse_string_view [size={}, str_len={}]", size, str_len).as_str(),
68+
|b| {
69+
b.iter(|| {
70+
// TODO use invoke_with_args
71+
black_box(reverse.invoke_batch(&args, str_len))
72+
})
73+
},
74+
);
75+
76+
let args = create_args::<i32>(size, str_len, false);
77+
c.bench_function(
78+
format!("reverse_string [size={}, str_len={}]", size, str_len).as_str(),
79+
|b| {
80+
b.iter(|| {
81+
// TODO use invoke_with_args
82+
black_box(reverse.invoke_batch(&args, str_len))
83+
})
84+
},
85+
);
86+
}
87+
}
88+
89+
criterion_group!(benches, criterion_benchmark);
90+
criterion_main!(benches);

datafusion/functions/src/unicode/reverse.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ use std::sync::Arc;
2020

2121
use crate::utils::{make_scalar_function, utf8_to_str_type};
2222
use arrow::array::{
23-
Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
24-
OffsetSizeTrait,
23+
Array, ArrayRef, AsArray, GenericStringBuilder, OffsetSizeTrait, StringArrayType,
2524
};
2625
use arrow::datatypes::DataType;
2726
use datafusion_common::{exec_err, Result};
@@ -105,8 +104,7 @@ impl ScalarUDFImpl for ReverseFunc {
105104
}
106105
}
107106

108-
/// Reverses the order of the characters in the string.
109-
/// reverse('abcde') = 'edcba'
107+
/// Reverses the order of the characters in the string `reverse('abcde') = 'edcba'`.
110108
/// The implementation uses UTF-8 code points as characters
111109
pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
112110
if args[0].data_type() == &Utf8View {
@@ -116,14 +114,23 @@ pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
116114
}
117115
}
118116

119-
fn reverse_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
117+
fn reverse_impl<'a, T: OffsetSizeTrait, V: StringArrayType<'a>>(
120118
string_array: V,
121119
) -> Result<ArrayRef> {
122-
let result = ArrayIter::new(string_array)
123-
.map(|string| string.map(|string: &str| string.chars().rev().collect::<String>()))
124-
.collect::<GenericStringArray<T>>();
120+
let mut builder = GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
121+
122+
let mut reversed = String::new();
123+
for string in string_array.iter() {
124+
if let Some(s) = string {
125+
reversed.extend(s.chars().rev());
126+
builder.append_value(&reversed);
127+
reversed.clear();
128+
} else {
129+
builder.append_null();
130+
}
131+
}
125132

126-
Ok(Arc::new(result) as ArrayRef)
133+
Ok(Arc::new(builder.finish()) as ArrayRef)
127134
}
128135

129136
#[cfg(test)]

0 commit comments

Comments
 (0)