Skip to content

Commit 8d83cc1

Browse files
authored
chore: Refactor cast to use SparkCastOptions param (#1146)
* Refactor cast to use SparkCastOptions param * update tests * update benches * update benches * update benches
1 parent 1c6c7a9 commit 8d83cc1

File tree

6 files changed

+83
-158
lines changed

6 files changed

+83
-158
lines changed

native/core/src/execution/datafusion/planner.rs

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ use datafusion_comet_proto::{
100100
};
101101
use datafusion_comet_spark_expr::{
102102
ArrayInsert, Cast, CreateNamedStruct, DateTruncExpr, GetArrayStructFields, GetStructField,
103-
HourExpr, IfExpr, ListExtract, MinuteExpr, RLike, SecondExpr, TimestampTruncExpr, ToJson,
103+
HourExpr, IfExpr, ListExtract, MinuteExpr, RLike, SecondExpr, SparkCastOptions,
104+
TimestampTruncExpr, ToJson,
104105
};
105106
use datafusion_common::scalar::ScalarStructBuilder;
106107
use datafusion_common::{
@@ -388,14 +389,11 @@ impl PhysicalPlanner {
388389
ExprStruct::Cast(expr) => {
389390
let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
390391
let datatype = to_arrow_datatype(expr.datatype.as_ref().unwrap());
391-
let timezone = expr.timezone.clone();
392392
let eval_mode = from_protobuf_eval_mode(expr.eval_mode)?;
393393
Ok(Arc::new(Cast::new(
394394
child,
395395
datatype,
396-
eval_mode,
397-
timezone,
398-
expr.allow_incompat,
396+
SparkCastOptions::new(eval_mode, &expr.timezone, expr.allow_incompat),
399397
)))
400398
}
401399
ExprStruct::Hour(expr) => {
@@ -806,24 +804,21 @@ impl PhysicalPlanner {
806804
let data_type = return_type.map(to_arrow_datatype).unwrap();
807805
// For some Decimal128 operations, we need wider internal digits.
808806
// Cast left and right to Decimal256 and cast the result back to Decimal128
809-
let left = Arc::new(Cast::new_without_timezone(
807+
let left = Arc::new(Cast::new(
810808
left,
811809
DataType::Decimal256(p1, s1),
812-
EvalMode::Legacy,
813-
false,
810+
SparkCastOptions::new_without_timezone(EvalMode::Legacy, false),
814811
));
815-
let right = Arc::new(Cast::new_without_timezone(
812+
let right = Arc::new(Cast::new(
816813
right,
817814
DataType::Decimal256(p2, s2),
818-
EvalMode::Legacy,
819-
false,
815+
SparkCastOptions::new_without_timezone(EvalMode::Legacy, false),
820816
));
821817
let child = Arc::new(BinaryExpr::new(left, op, right));
822-
Ok(Arc::new(Cast::new_without_timezone(
818+
Ok(Arc::new(Cast::new(
823819
child,
824820
data_type,
825-
EvalMode::Legacy,
826-
false,
821+
SparkCastOptions::new_without_timezone(EvalMode::Legacy, false),
827822
)))
828823
}
829824
(

native/spark-expr/benches/cast_from_string.rs

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,36 +18,18 @@
1818
use arrow_array::{builder::StringBuilder, RecordBatch};
1919
use arrow_schema::{DataType, Field, Schema};
2020
use criterion::{criterion_group, criterion_main, Criterion};
21-
use datafusion_comet_spark_expr::{Cast, EvalMode};
21+
use datafusion_comet_spark_expr::{Cast, EvalMode, SparkCastOptions};
2222
use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
2323
use std::sync::Arc;
2424

2525
fn criterion_benchmark(c: &mut Criterion) {
2626
let batch = create_utf8_batch();
2727
let expr = Arc::new(Column::new("a", 0));
28-
let timezone = "".to_string();
29-
let cast_string_to_i8 = Cast::new(
30-
expr.clone(),
31-
DataType::Int8,
32-
EvalMode::Legacy,
33-
timezone.clone(),
34-
false,
35-
);
36-
let cast_string_to_i16 = Cast::new(
37-
expr.clone(),
38-
DataType::Int16,
39-
EvalMode::Legacy,
40-
timezone.clone(),
41-
false,
42-
);
43-
let cast_string_to_i32 = Cast::new(
44-
expr.clone(),
45-
DataType::Int32,
46-
EvalMode::Legacy,
47-
timezone.clone(),
48-
false,
49-
);
50-
let cast_string_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone, false);
28+
let spark_cast_options = SparkCastOptions::new(EvalMode::Legacy, "", false);
29+
let cast_string_to_i8 = Cast::new(expr.clone(), DataType::Int8, spark_cast_options.clone());
30+
let cast_string_to_i16 = Cast::new(expr.clone(), DataType::Int16, spark_cast_options.clone());
31+
let cast_string_to_i32 = Cast::new(expr.clone(), DataType::Int32, spark_cast_options.clone());
32+
let cast_string_to_i64 = Cast::new(expr, DataType::Int64, spark_cast_options);
5133

5234
let mut group = c.benchmark_group("cast_string_to_int");
5335
group.bench_function("cast_string_to_i8", |b| {

native/spark-expr/benches/cast_numeric.rs

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,17 @@
1818
use arrow_array::{builder::Int32Builder, RecordBatch};
1919
use arrow_schema::{DataType, Field, Schema};
2020
use criterion::{criterion_group, criterion_main, Criterion};
21-
use datafusion_comet_spark_expr::{Cast, EvalMode};
21+
use datafusion_comet_spark_expr::{Cast, EvalMode, SparkCastOptions};
2222
use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
2323
use std::sync::Arc;
2424

2525
fn criterion_benchmark(c: &mut Criterion) {
2626
let batch = create_int32_batch();
2727
let expr = Arc::new(Column::new("a", 0));
28-
let timezone = "".to_string();
29-
let cast_i32_to_i8 = Cast::new(
30-
expr.clone(),
31-
DataType::Int8,
32-
EvalMode::Legacy,
33-
timezone.clone(),
34-
false,
35-
);
36-
let cast_i32_to_i16 = Cast::new(
37-
expr.clone(),
38-
DataType::Int16,
39-
EvalMode::Legacy,
40-
timezone.clone(),
41-
false,
42-
);
43-
let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, EvalMode::Legacy, timezone, false);
28+
let spark_cast_options = SparkCastOptions::new_without_timezone(EvalMode::Legacy, false);
29+
let cast_i32_to_i8 = Cast::new(expr.clone(), DataType::Int8, spark_cast_options.clone());
30+
let cast_i32_to_i16 = Cast::new(expr.clone(), DataType::Int16, spark_cast_options.clone());
31+
let cast_i32_to_i64 = Cast::new(expr, DataType::Int64, spark_cast_options);
4432

4533
let mut group = c.benchmark_group("cast_int_to_int");
4634
group.bench_function("cast_i32_to_i8", |b| {

0 commit comments

Comments
 (0)