Skip to content

Commit

Permalink
Add StringView benchmark for regexp_is_match
Browse files Browse the repository at this point in the history
Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
  • Loading branch information
tlm365 committed Sep 11, 2024
1 parent 595d64c commit 514847f
Showing 1 changed file with 51 additions and 10 deletions.
61 changes: 51 additions & 10 deletions arrow/benches/comparison_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@

#[macro_use]
extern crate criterion;
use arrow::util::test_util::seedable_rng;
use criterion::Criterion;

extern crate arrow;

use std::time::Duration;

use arrow::compute::kernels::cmp::*;
use arrow::util::bench_util::*;
use arrow::util::test_util::seedable_rng;
use arrow::{array::*, datatypes::Float32Type, datatypes::Int32Type};
use arrow_buffer::IntervalMonthDayNano;
use arrow_string::like::*;
use arrow_string::regexp::regexp_is_match_utf8_scalar;
use criterion::{Criterion, SamplingMode};
use rand::rngs::StdRng;
use rand::Rng;

Expand All @@ -53,6 +54,15 @@ fn bench_nilike_utf8_scalar(arr_a: &StringArray, value_b: &str) {
nilike(arr_a, &StringArray::new_scalar(value_b)).unwrap();
}

fn bench_regexp_is_match_utf8view_scalar(arr_a: &StringViewArray, value_b: &str) {
regexp_is_match_utf8_scalar(
criterion::black_box(arr_a),
criterion::black_box(value_b),
None,
)
.unwrap();
}

fn bench_regexp_is_match_utf8_scalar(arr_a: &StringArray, value_b: &str) {
regexp_is_match_utf8_scalar(
criterion::black_box(arr_a),
Expand All @@ -78,6 +88,7 @@ fn add_benchmark(c: &mut Criterion) {
let arr_month_day_nano_b = create_month_day_nano_array_with_seed(SIZE, 0.0, 43);

let arr_string = create_string_array::<i32>(SIZE, 0.0);
let arr_string_view = create_string_view_array(SIZE, 0.0);

let scalar = Float32Array::from(vec![1.0]);

Expand Down Expand Up @@ -322,35 +333,65 @@ fn add_benchmark(c: &mut Criterion) {
});

// StringArray: NOT ILIKE benchmarks
let mut group = c.benchmark_group("StringArray: NOT ILIKE benchmarks".to_string());
group.sampling_mode(SamplingMode::Flat);
group.sample_size(60);
group.measurement_time(Duration::from_secs(6));

c.bench_function("nilike_utf8 scalar equals", |b| {
group.bench_function("nilike_utf8 scalar equals", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "xxXX"))
});

c.bench_function("nilike_utf8 scalar contains", |b| {
group.bench_function("nilike_utf8 scalar contains", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xxXX%"))
});

c.bench_function("nilike_utf8 scalar ends with", |b| {
group.bench_function("nilike_utf8 scalar ends with", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xXXx"))
});

c.bench_function("nilike_utf8 scalar starts with", |b| {
group.bench_function("nilike_utf8 scalar starts with", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "XXXx%"))
});

c.bench_function("nilike_utf8 scalar complex", |b| {
group.bench_function("nilike_utf8 scalar complex", |b| {
b.iter(|| bench_nilike_utf8_scalar(&arr_string, "%xx_xX%xXX"))
});

c.bench_function("regexp_matches_utf8 scalar starts with", |b| {
group.bench_function("regexp_matches_utf8 scalar starts with", |b| {
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "^xx"))
});

c.bench_function("regexp_matches_utf8 scalar ends with", |b| {
group.bench_function("regexp_matches_utf8 scalar contains", |b| {
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, ".*xx.*"))
});

group.bench_function("regexp_matches_utf8 scalar ends with", |b| {
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, "xx$"))
});

group.bench_function("regexp_matches_utf8 scalar complex", |b| {
b.iter(|| bench_regexp_is_match_utf8_scalar(&arr_string, ".*xx.xX.*xXX"))
});

group.bench_function("regexp_matches_utf8view scalar starts with", |b| {
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, "^xx"))
});

group.bench_function("regexp_matches_utf8view scalar contains", |b| {
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, ".*xx.*"))
});

group.bench_function("regexp_matches_utf8view scalar ends with", |b| {
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, "xx$"))
});

group.bench_function("regexp_matches_utf8view scalar complex", |b| {
b.iter(|| bench_regexp_is_match_utf8view_scalar(&arr_string_view, ".*xx.xX.*xXX"))
});

group.finish();

// DictionaryArray benchmarks

let strings = create_string_array::<i32>(20, 0.);
Expand Down

0 comments on commit 514847f

Please sign in to comment.