-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add business_day_count function
- Loading branch information
1 parent
cc6c642
commit 999a70c
Showing
18 changed files
with
335 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
crates/polars-plan/src/dsl/function_expr/business/business_day_count.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
use polars_core::chunked_array::ops::arity::binary_elementwise_values; | ||
use polars_core::prelude::*; | ||
|
||
pub(super) fn business_day_count(s: &[Series]) -> PolarsResult<Series> { | ||
let start = &s[0]; | ||
let end = &s[1]; | ||
|
||
let start_dates = start.date()?; | ||
let end_dates = end.date()?; | ||
|
||
// TODO: support customising weekdays | ||
let week_mask: [bool; 7] = [true, true, true, true, true, false, false]; | ||
let n_weekdays = week_mask.iter().filter(|&x| *x).count() as i32; | ||
|
||
let out = match (start_dates.len(), end_dates.len()) { | ||
(_, 1) => { | ||
if let Some(end_date) = end_dates.get(0) { | ||
start_dates.apply_values(|start_date| { | ||
impl_business_day_count(start_date, end_date, &week_mask, n_weekdays) | ||
}) | ||
} else { | ||
Int32Chunked::full_null(start_dates.name(), start_dates.len()) | ||
} | ||
}, | ||
(1, _) => { | ||
if let Some(start_date) = start_dates.get(0) { | ||
end_dates.apply_values(|end_date| { | ||
impl_business_day_count(start_date, end_date, &week_mask, n_weekdays) | ||
}) | ||
} else { | ||
Int32Chunked::full_null(start_dates.name(), start_dates.len()) | ||
} | ||
}, | ||
_ => binary_elementwise_values(start_dates, end_dates, |start_date, end_date| { | ||
impl_business_day_count(start_date, end_date, &week_mask, n_weekdays) | ||
}), | ||
}; | ||
Ok(out.into_series()) | ||
} | ||
|
||
/// Ported from: | ||
/// https://github.com/numpy/numpy/blob/e59c074842e3f73483afa5ddef031e856b9fd313/numpy/_core/src/multiarray/datetime_busday.c#L355-L433 | ||
fn impl_business_day_count( | ||
mut start_date: i32, | ||
mut end_date: i32, | ||
weekmask: &[bool; 7], | ||
n_weekdays: i32, | ||
) -> i32 { | ||
let swapped = start_date > end_date; | ||
if swapped { | ||
(start_date, end_date) = (end_date, start_date); | ||
start_date += 1; | ||
end_date += 1; | ||
} | ||
|
||
let mut start_weekday = weekday(start_date) as usize; | ||
let diff = end_date - start_date; | ||
let whole_weeks = diff / 7; | ||
let mut count = 0; | ||
count += whole_weeks * n_weekdays; | ||
start_date += whole_weeks * 7; | ||
while start_date < end_date { | ||
if unsafe { *weekmask.get_unchecked(start_weekday - 1) } { | ||
count += 1; | ||
} | ||
start_date += 1; | ||
start_weekday += 1; | ||
if start_weekday > 7 { | ||
start_weekday = 1; | ||
} | ||
} | ||
if swapped { | ||
-count | ||
} else { | ||
count | ||
} | ||
} | ||
|
||
pub(crate) fn weekday(x: i32) -> i32 { | ||
// the first modulo might return a negative number, so we add 7 and take | ||
// the modulo again so we're sure we have something between 0 and 6 | ||
((x - 4) % 7 + 7) % 7 + 1 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
use std::fmt::{Display, Formatter}; | ||
|
||
use polars_core::prelude::*; | ||
#[cfg(feature = "serde")] | ||
use serde::{Deserialize, Serialize}; | ||
|
||
use crate::dsl::SpecialEq; | ||
use crate::map_as_slice; | ||
use crate::prelude::SeriesUdf; | ||
|
||
#[cfg(feature = "dtype-date")] | ||
mod business_day_count; | ||
|
||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||
#[derive(Clone, PartialEq, Debug, Eq, Hash)] | ||
pub enum BusinessFunction { | ||
#[cfg(feature = "business")] | ||
BusinessDayCount, | ||
} | ||
|
||
impl Display for BusinessFunction { | ||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { | ||
use BusinessFunction::*; | ||
let s = match self { | ||
#[cfg(feature = "business")] | ||
&BusinessDayCount => "business_day_count", | ||
}; | ||
write!(f, "{s}") | ||
} | ||
} | ||
impl From<BusinessFunction> for SpecialEq<Arc<dyn SeriesUdf>> { | ||
fn from(func: BusinessFunction) -> Self { | ||
use BusinessFunction::*; | ||
match func { | ||
#[cfg(feature = "business")] | ||
BusinessDayCount => { | ||
map_as_slice!(business_day_count::business_day_count) | ||
}, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
use super::*; | ||
|
||
#[cfg(feature = "dtype-date")] | ||
pub fn business_day_count(start: Expr, end: Expr) -> Expr { | ||
let input = vec![start, end]; | ||
|
||
Expr::Function { | ||
input, | ||
function: FunctionExpr::Business(BusinessFunction::BusinessDayCount {}), | ||
options: FunctionOptions { | ||
allow_rename: true, | ||
..Default::default() | ||
}, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
from __future__ import annotations | ||
|
||
import contextlib | ||
from typing import TYPE_CHECKING | ||
|
||
from polars._utils.parse_expr_input import parse_as_expression | ||
from polars._utils.wrap import wrap_expr | ||
|
||
with contextlib.suppress(ImportError): # Module not available when building docs | ||
import polars.polars as plr | ||
|
||
if TYPE_CHECKING: | ||
from datetime import date | ||
|
||
from polars import Expr | ||
from polars.type_aliases import IntoExprColumn | ||
|
||
|
||
def business_day_count( | ||
start: date | IntoExprColumn, | ||
end: date | IntoExprColumn, | ||
) -> Expr: | ||
""" | ||
Count the number of business days between `start` and `end`. | ||
By default, Saturday and Sunday are excluded. The ability to | ||
customise week mask and holidays are not yet implemented. | ||
Parameters | ||
---------- | ||
start | ||
Start dates. | ||
end | ||
End dates. | ||
Returns | ||
------- | ||
Expr | ||
Examples | ||
-------- | ||
>>> from datetime import date | ||
>>> df = pl.DataFrame( | ||
... { | ||
... "start": [date(2020, 1, 1), date(2020, 1, 2)], | ||
... "end": [date(2020, 1, 2), date(2020, 1, 10)], | ||
... } | ||
... ) | ||
>>> df.with_columns( | ||
... total_day_count=(pl.col("end") - pl.col("start")).dt.total_days(), | ||
... business_day_count=pl.business_day_count("start", "end"), | ||
... ) | ||
shape: (2, 4) | ||
┌────────────┬────────────┬─────────────────┬────────────────────┐ | ||
│ start ┆ end ┆ total_day_count ┆ business_day_count │ | ||
│ --- ┆ --- ┆ --- ┆ --- │ | ||
│ date ┆ date ┆ i64 ┆ i32 │ | ||
╞════════════╪════════════╪═════════════════╪════════════════════╡ | ||
│ 2020-01-01 ┆ 2020-01-02 ┆ 1 ┆ 1 │ | ||
│ 2020-01-02 ┆ 2020-01-10 ┆ 8 ┆ 6 │ | ||
└────────────┴────────────┴─────────────────┴────────────────────┘ | ||
Note how the two "count" columns differ due to the weekend (2020-01-04 - 2020-01-05) | ||
not being counted by `business_day_count`. | ||
""" | ||
start_pyexpr = parse_as_expression(start) | ||
end_pyexpr = parse_as_expression(end) | ||
return wrap_expr(plr.business_day_count(start_pyexpr, end_pyexpr)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
use polars::lazy::dsl; | ||
use pyo3::prelude::*; | ||
|
||
use crate::PyExpr; | ||
|
||
#[pyfunction] | ||
pub fn business_day_count(start: PyExpr, end: PyExpr) -> PyExpr { | ||
let start = start.inner; | ||
let end = end.inner; | ||
dsl::business_day_count(start, end).into() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.