Skip to content

Commit 1c07d91

Browse files
author
Vojtech Toman
committed
Optimize make_date (#9089)
* replace the expensive calculation of unix_days_from_ce with a constant * do not use PrimitiveArray builder for the scalar case
1 parent 0302d65 commit 1c07d91

File tree

1 file changed

+52
-63
lines changed

1 file changed

+52
-63
lines changed

datafusion/physical-expr/src/datetime_expressions.rs

Lines changed: 52 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -303,44 +303,10 @@ pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
303303
ColumnarValue::Array(a) => Some(a.len()),
304304
});
305305

306-
let is_scalar = len.is_none();
307-
let array_size = if is_scalar { 1 } else { len.unwrap() };
308-
309306
let years = args[0].cast_to(&DataType::Int32, None)?;
310307
let months = args[1].cast_to(&DataType::Int32, None)?;
311308
let days = args[2].cast_to(&DataType::Int32, None)?;
312309

313-
// since the epoch for the date32 datatype is the unix epoch
314-
// we need to subtract the unix epoch from the current date
315-
// note this can result in a negative value
316-
let unix_days_from_ce = NaiveDate::from_ymd_opt(1970, 1, 1)
317-
.unwrap()
318-
.num_days_from_ce();
319-
320-
let mut builder: PrimitiveBuilder<Date32Type> = PrimitiveArray::builder(array_size);
321-
322-
let construct_date_fn = |builder: &mut PrimitiveBuilder<Date32Type>,
323-
year: i32,
324-
month: i32,
325-
day: i32,
326-
unix_days_from_ce: i32|
327-
-> Result<()> {
328-
let Ok(m) = u32::try_from(month) else {
329-
return exec_err!("Month value '{month:?}' is out of range");
330-
};
331-
let Ok(d) = u32::try_from(day) else {
332-
return exec_err!("Day value '{day:?}' is out of range");
333-
};
334-
335-
let date = NaiveDate::from_ymd_opt(year, m, d);
336-
337-
match date {
338-
Some(d) => builder.append_value(d.num_days_from_ce() - unix_days_from_ce),
339-
None => return exec_err!("Unable to parse date from {year}, {month}, {day}"),
340-
};
341-
Ok(())
342-
};
343-
344310
let scalar_value_fn = |col: &ColumnarValue| -> Result<i32> {
345311
let ColumnarValue::Scalar(s) = col else {
346312
return exec_err!("Expected scalar value");
@@ -351,51 +317,74 @@ pub fn make_date(args: &[ColumnarValue]) -> Result<ColumnarValue> {
351317
Ok(*i)
352318
};
353319

354-
// For scalar only columns the operation is faster without using the PrimitiveArray
355-
if is_scalar {
356-
construct_date_fn(
357-
&mut builder,
358-
scalar_value_fn(&years)?,
359-
scalar_value_fn(&months)?,
360-
scalar_value_fn(&days)?,
361-
unix_days_from_ce,
362-
)?;
363-
} else {
364-
let to_primitive_array = |col: &ColumnarValue,
365-
scalar_count: usize|
366-
-> Result<PrimitiveArray<Int32Type>> {
320+
let value = if let Some(array_size) = len {
321+
let to_primitive_array_fn = |col: &ColumnarValue| -> PrimitiveArray<Int32Type> {
367322
match col {
368-
ColumnarValue::Array(a) => Ok(a.as_primitive::<Int32Type>().to_owned()),
323+
ColumnarValue::Array(a) => a.as_primitive::<Int32Type>().to_owned(),
369324
_ => {
370325
let v = scalar_value_fn(col).unwrap();
371-
Ok(PrimitiveArray::<Int32Type>::from_value(v, scalar_count))
326+
PrimitiveArray::<Int32Type>::from_value(v, array_size)
372327
}
373328
}
374329
};
375330

376-
let years = to_primitive_array(&years, array_size).unwrap();
377-
let months = to_primitive_array(&months, array_size).unwrap();
378-
let days = to_primitive_array(&days, array_size).unwrap();
331+
let years = to_primitive_array_fn(&years);
332+
let months = to_primitive_array_fn(&months);
333+
let days = to_primitive_array_fn(&days);
334+
335+
let mut builder: PrimitiveBuilder<Date32Type> = PrimitiveArray::builder(array_size);
379336
for i in 0..array_size {
380-
construct_date_fn(
381-
&mut builder,
337+
process_date(
382338
years.value(i),
383339
months.value(i),
384340
days.value(i),
385-
unix_days_from_ce,
341+
|days: i32| -> () { builder.append_value(days) },
386342
)?;
387343
}
388-
}
389344

390-
let arr = builder.finish();
345+
let arr = builder.finish();
391346

392-
if is_scalar {
393-
// If all inputs are scalar, keeps output as scalar
394-
Ok(ColumnarValue::Scalar(ScalarValue::Date32(Some(
395-
arr.value(0),
396-
))))
347+
ColumnarValue::Array(Arc::new(arr))
348+
} else {
349+
// For scalar only columns the operation is faster without using the PrimitiveArray.
350+
// Also, keep the output as scalar since all inputs are scalar .
351+
let mut value = 0;
352+
process_date(
353+
scalar_value_fn(&years)?,
354+
scalar_value_fn(&months)?,
355+
scalar_value_fn(&days)?,
356+
|days: i32| -> () { value = days },
357+
)?;
358+
359+
ColumnarValue::Scalar(ScalarValue::Date32(Some(value)))
360+
};
361+
362+
Ok(value)
363+
}
364+
365+
fn process_date(year: i32,
366+
month: i32,
367+
day: i32,
368+
mut date_consumer_fn: impl FnMut(i32) -> ()) -> Result<()> {
369+
let Ok(m) = u32::try_from(month) else {
370+
return exec_err!("Month value '{month:?}' is out of range");
371+
};
372+
let Ok(d) = u32::try_from(day) else {
373+
return exec_err!("Day value '{day:?}' is out of range");
374+
};
375+
376+
if let Some(date) = NaiveDate::from_ymd_opt(year, m, d) {
377+
// The number of days until the start of the unix epoch in the proleptic Gregorian calendar
378+
// (with January 1, Year 1 (CE) as day 1). See [Datelike::num_days_from_ce].
379+
const UNIX_DAYS_FROM_CE: i32 = 719_163;
380+
381+
// since the epoch for the date32 datatype is the unix epoch
382+
// we need to subtract the unix epoch from the current date
383+
// note this can result in a negative value
384+
date_consumer_fn(date.num_days_from_ce() - UNIX_DAYS_FROM_CE);
385+
Ok(())
397386
} else {
398-
Ok(ColumnarValue::Array(Arc::new(arr)))
387+
exec_err!("Unable to parse date from {year}, {month}, {day}")
399388
}
400389
}
401390

0 commit comments

Comments
 (0)