From bc6b2cda3efd2b0c6c48f932ce19da46456bcbd5 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 9 May 2024 12:04:55 -0600 Subject: [PATCH] chore: Add criterion benchmarks for casting between integer types (#401) * Add cargo bench for casting between int types * Update core/benches/cast_from_string.rs Co-authored-by: comphead --------- Co-authored-by: comphead --- core/Cargo.toml | 6 +- core/benches/{cast.rs => cast_from_string.rs} | 34 ++++---- core/benches/cast_numeric.rs | 79 +++++++++++++++++++ 3 files changed, 104 insertions(+), 15 deletions(-) rename core/benches/{cast.rs => cast_from_string.rs} (93%) create mode 100644 core/benches/cast_numeric.rs diff --git a/core/Cargo.toml b/core/Cargo.toml index cbca7f629..ac565680a 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -119,5 +119,9 @@ name = "row_columnar" harness = false [[bench]] -name = "cast" +name = "cast_from_string" +harness = false + +[[bench]] +name = "cast_numeric" harness = false diff --git a/core/benches/cast.rs b/core/benches/cast_from_string.rs similarity index 93% rename from core/benches/cast.rs rename to core/benches/cast_from_string.rs index 281fe82e2..5bfaebf34 100644 --- a/core/benches/cast.rs +++ b/core/benches/cast_from_string.rs @@ -23,19 +23,7 @@ use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { - let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)])); - let mut b = StringBuilder::new(); - for i in 0..1000 { - if i % 10 == 0 { - b.append_null(); - } else if i % 2 == 0 { - b.append_value(format!("{}", rand::random::())); - } else { - b.append_value(format!("{}", rand::random::())); - } - } - let array = b.finish(); - let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap(); + let batch = create_utf8_batch(); let expr = Arc::new(Column::new("a", 0)); let timezone = "".to_string(); let cast_string_to_i8 = Cast::new( @@ -58,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) { ); let cast_string_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone); - let mut group = c.benchmark_group("cast"); + let mut group = c.benchmark_group("cast_string_to_int"); group.bench_function("cast_string_to_i8", |b| { b.iter(|| cast_string_to_i8.evaluate(&batch).unwrap()); }); @@ -73,6 +61,24 @@ fn criterion_benchmark(c: &mut Criterion) { }); } +// Create UTF8 batch with strings representing ints, floats, nulls +fn create_utf8_batch() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, true)])); + let mut b = StringBuilder::new(); + for i in 0..1000 { + if i % 10 == 0 { + b.append_null(); + } else if i % 2 == 0 { + b.append_value(format!("{}", rand::random::())); + } else { + b.append_value(format!("{}", rand::random::())); + } + } + let array = b.finish(); + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap(); + batch +} + fn config() -> Criterion { Criterion::default() } diff --git a/core/benches/cast_numeric.rs b/core/benches/cast_numeric.rs new file mode 100644 index 000000000..398be6946 --- /dev/null +++ b/core/benches/cast_numeric.rs @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{builder::Int32Builder, RecordBatch}; +use arrow_schema::{DataType, Field, Schema}; +use comet::execution::datafusion::expressions::cast::{Cast, EvalMode}; +use criterion::{criterion_group, criterion_main, Criterion}; +use datafusion_physical_expr::{expressions::Column, PhysicalExpr}; +use std::sync::Arc; + +fn criterion_benchmark(c: &mut Criterion) { + let batch = create_int32_batch(); + let expr = Arc::new(Column::new("a", 0)); + let timezone = "".to_string(); + let cast_i32_to_i8 = Cast::new( + expr.clone(), + DataType::Int8, + EvalMode::Legacy, + timezone.clone(), + ); + let cast_i32_to_i16 = Cast::new( + expr.clone(), + DataType::Int16, + EvalMode::Legacy, + timezone.clone(), + ); + let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone); + + let mut group = c.benchmark_group("cast_int_to_int"); + group.bench_function("cast_i32_to_i8", |b| { + b.iter(|| cast_i32_to_i8.evaluate(&batch).unwrap()); + }); + group.bench_function("cast_i32_to_i16", |b| { + b.iter(|| cast_i32_to_i16.evaluate(&batch).unwrap()); + }); + group.bench_function("cast_i32_to_i64", |b| { + b.iter(|| cast_i32_to_i64.evaluate(&batch).unwrap()); + }); +} + +fn create_int32_batch() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)])); + let mut b = Int32Builder::new(); + for i in 0..1000 { + if i % 10 == 0 { + b.append_null(); + } else { + b.append_value(rand::random::()); + } + } + let array = b.finish(); + let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap(); + batch +} + +fn config() -> Criterion { + Criterion::default() +} + +criterion_group! { + name = benches; + config = config(); + targets = criterion_benchmark +} +criterion_main!(benches);