Skip to content

Commit 01c5efc

Browse files
authored
Add sort_kernel benchmark for StringViewArray case (#7787)
# Which issue does this PR close? Add sort_kernel benchmark for StringViewArray case - Closes [#7758](#7758) # Rationale for this change Add sort_kernel benchmark for StringViewArray case # What changes are included in this PR? Add sort_kernel benchmark for StringViewArray case # Are these changes tested? Yes # Are there any user-facing changes? No
1 parent b269422 commit 01c5efc

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

arrow/benches/sort_kernel.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,43 @@ fn add_benchmark(c: &mut Criterion) {
113113
b.iter(|| bench_sort_to_indices(&arr, None))
114114
});
115115

116+
// This will generate string view arrays with 2^12 elements, each with a length fixed 10, and without nulls.
117+
let arr = create_string_view_array_with_fixed_len(2usize.pow(12), 0.0, 10);
118+
c.bench_function("sort string_view[10] to indices 2^12", |b| {
119+
b.iter(|| bench_sort_to_indices(&arr, None))
120+
});
121+
122+
// This will generate string view arrays with 2^12 elements, each with a length fixed 10, and with 50% nulls.
123+
let arr = create_string_view_array_with_fixed_len(2usize.pow(12), 0.5, 10);
124+
c.bench_function("sort string_view[10] nulls to indices 2^12", |b| {
125+
b.iter(|| bench_sort_to_indices(&arr, None))
126+
});
127+
128+
// This will generate string view arrays with 2^12 elements, each with a length randomly chosen from 0 to max 400, and without nulls.
129+
let arr = create_string_view_array(2usize.pow(12), 0.0);
130+
c.bench_function("sort string_view[0-400] to indices 2^12", |b| {
131+
b.iter(|| bench_sort_to_indices(&arr, None))
132+
});
133+
134+
// This will generate string view arrays with 2^12 elements, each with a length randomly chosen from 0 to max 400, and with 50% nulls.
135+
let arr = create_string_view_array(2usize.pow(12), 0.5);
136+
c.bench_function("sort string_view[0-400] nulls to indices 2^12", |b| {
137+
b.iter(|| bench_sort_to_indices(&arr, None))
138+
});
139+
140+
// This will generate string view arrays with 2^12 elements, each with a length < 12 bytes which is inlined data, and without nulls.
141+
let arr = create_string_view_array_with_max_len(2usize.pow(12), 0.0, 12);
142+
c.bench_function("sort string_view_inlined[0-12] to indices 2^12", |b| {
143+
b.iter(|| bench_sort_to_indices(&arr, None))
144+
});
145+
146+
// This will generate string view arrays with 2^12 elements, each with a length < 12 bytes which is inlined data, and with 50% nulls.
147+
let arr = create_string_view_array_with_max_len(2usize.pow(12), 0.5, 12);
148+
c.bench_function(
149+
"sort string_view_inlined[0-12] nulls to indices 2^12",
150+
|b| b.iter(|| bench_sort_to_indices(&arr, None)),
151+
);
152+
116153
let arr = create_string_dict_array::<Int32Type>(2usize.pow(12), 0.0, 10);
117154
c.bench_function("sort string[10] dict to indices 2^12", |b| {
118155
b.iter(|| bench_sort_to_indices(&arr, None))

arrow/src/util/bench_util.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,26 @@ pub fn create_string_view_array_with_max_len(
297297
.collect()
298298
}
299299

300+
/// Creates a random (but fixed-seeded) array of a given size, null density and length
301+
pub fn create_string_view_array_with_fixed_len(
302+
size: usize,
303+
null_density: f32,
304+
str_len: usize,
305+
) -> StringViewArray {
306+
let rng = &mut seedable_rng();
307+
(0..size)
308+
.map(|_| {
309+
if rng.random::<f32>() < null_density {
310+
None
311+
} else {
312+
let value = rng.sample_iter(&Alphanumeric).take(str_len).collect();
313+
let value = String::from_utf8(value).unwrap();
314+
Some(value)
315+
}
316+
})
317+
.collect()
318+
}
319+
300320
/// Creates a random (but fixed-seeded) array of a given size, null density and length
301321
pub fn create_string_view_array_with_len(
302322
size: usize,

0 commit comments

Comments
 (0)