Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add business_day_count function #15512

Merged
merged 3 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ is_between = ["polars-plan/is_between"]
is_unique = ["polars-plan/is_unique"]
cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"]
asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"]
business = ["polars-plan/business"]
concat_str = ["polars-plan/concat_str"]
range = ["polars-plan/range"]
mode = ["polars-plan/mode"]
Expand Down
1 change: 1 addition & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ is_unique = []
unique_counts = []
is_between = []
approx_unique = []
business = ["dtype-date"]
fused = []
cutqcut = ["dtype-categorical", "dtype-struct"]
rle = ["dtype-struct"]
Expand Down
97 changes: 97 additions & 0 deletions crates/polars-ops/src/series/ops/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use polars_core::prelude::arity::binary_elementwise_values;
use polars_core::prelude::*;

/// Count the number of business days between `start` and `end`, excluding `end`.
pub fn business_day_count(start: &Series, end: &Series) -> PolarsResult<Series> {
let start_dates = start.date()?;
let end_dates = end.date()?;

// TODO: support customising weekdays
let week_mask: [bool; 7] = [true, true, true, true, true, false, false];
let n_business_days_in_week_mask = week_mask.iter().filter(|&x| *x).count() as i32;

let out = match (start_dates.len(), end_dates.len()) {
(_, 1) => {
if let Some(end_date) = end_dates.get(0) {
start_dates.apply_values(|start_date| {
business_day_count_impl(
start_date,
end_date,
&week_mask,
n_business_days_in_week_mask,
)
})
} else {
Int32Chunked::full_null(start_dates.name(), start_dates.len())
}
},
(1, _) => {
if let Some(start_date) = start_dates.get(0) {
end_dates.apply_values(|end_date| {
business_day_count_impl(
start_date,
end_date,
&week_mask,
n_business_days_in_week_mask,
)
})
} else {
Int32Chunked::full_null(start_dates.name(), end_dates.len())
}
},
_ => binary_elementwise_values(start_dates, end_dates, |start_date, end_date| {
business_day_count_impl(
start_date,
end_date,
&week_mask,
n_business_days_in_week_mask,
)
}),
};
Ok(out.into_series())
}

/// Ported from:
/// https://github.com/numpy/numpy/blob/e59c074842e3f73483afa5ddef031e856b9fd313/numpy/_core/src/multiarray/datetime_busday.c#L355-L433
fn business_day_count_impl(
mut start_date: i32,
mut end_date: i32,
week_mask: &[bool; 7],
n_business_days_in_week_mask: i32,
) -> i32 {
let swapped = start_date > end_date;
if swapped {
(start_date, end_date) = (end_date, start_date);
start_date += 1;
end_date += 1;
}

let mut start_weekday = weekday(start_date);
let diff = end_date - start_date;
let whole_weeks = diff / 7;
let mut count = 0;
count += whole_weeks * n_business_days_in_week_mask;
start_date += whole_weeks * 7;
while start_date < end_date {
if unsafe { *week_mask.get_unchecked(start_weekday) } {
count += 1;
}
start_date += 1;
start_weekday += 1;
if start_weekday >= 7 {
start_weekday = 0;
}
}
if swapped {
-count
} else {
count
}
}

fn weekday(x: i32) -> usize {
// the first modulo might return a negative number, so we add 7 and take
// the modulo again so we're sure we have something between 0 (Monday)
// and 6 (Sunday)
(((x - 4) % 7 + 7) % 7) as usize
}
4 changes: 4 additions & 0 deletions crates/polars-ops/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ mod approx_algo;
#[cfg(feature = "approx_unique")]
mod approx_unique;
mod arg_min_max;
#[cfg(feature = "business")]
mod business;
mod clip;
#[cfg(feature = "cum_agg")]
mod cum_agg;
Expand Down Expand Up @@ -65,6 +67,8 @@ pub use approx_algo::*;
#[cfg(feature = "approx_unique")]
pub use approx_unique::*;
pub use arg_min_max::ArgAgg;
#[cfg(feature = "business")]
pub use business::*;
pub use clip::*;
#[cfg(feature = "cum_agg")]
pub use cum_agg::*;
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ is_between = ["polars-ops/is_between"]
cross_join = ["polars-ops/cross_join"]
asof_join = ["polars-time", "polars-ops/asof_join"]
concat_str = []
business = ["polars-ops/business"]
range = []
mode = ["polars-ops/mode"]
cum_agg = ["polars-ops/cum_agg"]
Expand Down Expand Up @@ -252,6 +253,7 @@ features = [
"ciborium",
"dtype-decimal",
"arg_where",
"business",
"range",
"meta",
"hive_partitions",
Expand Down
45 changes: 45 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::fmt::{Display, Formatter};

use polars_core::prelude::*;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

use crate::dsl::SpecialEq;
use crate::map_as_slice;
use crate::prelude::SeriesUdf;

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[derive(Clone, PartialEq, Debug, Eq, Hash)]
pub enum BusinessFunction {
#[cfg(feature = "business")]
BusinessDayCount,
}

impl Display for BusinessFunction {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use BusinessFunction::*;
let s = match self {
#[cfg(feature = "business")]
&BusinessDayCount => "business_day_count",
};
write!(f, "{s}")
}
}
impl From<BusinessFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
fn from(func: BusinessFunction) -> Self {
use BusinessFunction::*;
match func {
#[cfg(feature = "business")]
BusinessDayCount => {
map_as_slice!(business_day_count)
},
}
}
}

#[cfg(feature = "business")]
pub(super) fn business_day_count(s: &[Series]) -> PolarsResult<Series> {
let start = &s[0];
let end = &s[1];
polars_ops::prelude::business_day_count(start, end)
}
12 changes: 12 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ mod array;
mod binary;
mod boolean;
mod bounds;
#[cfg(feature = "business")]
mod business;
#[cfg(feature = "dtype-categorical")]
mod cat;
#[cfg(feature = "round_series")]
Expand Down Expand Up @@ -81,6 +83,8 @@ use serde::{Deserialize, Serialize};

pub(crate) use self::binary::BinaryFunction;
pub use self::boolean::BooleanFunction;
#[cfg(feature = "business")]
pub(super) use self::business::BusinessFunction;
#[cfg(feature = "dtype-categorical")]
pub(crate) use self::cat::CategoricalFunction;
#[cfg(feature = "temporal")]
Expand Down Expand Up @@ -117,6 +121,8 @@ pub enum FunctionExpr {

// Other expressions
Boolean(BooleanFunction),
#[cfg(feature = "business")]
Business(BusinessFunction),
#[cfg(feature = "abs")]
Abs,
Negate,
Expand Down Expand Up @@ -349,6 +355,8 @@ impl Hash for FunctionExpr {

// Other expressions
Boolean(f) => f.hash(state),
#[cfg(feature = "business")]
Business(f) => f.hash(state),
Pow(f) => f.hash(state),
#[cfg(feature = "search_sorted")]
SearchSorted(f) => f.hash(state),
Expand Down Expand Up @@ -557,6 +565,8 @@ impl Display for FunctionExpr {

// Other expressions
Boolean(func) => return write!(f, "{func}"),
#[cfg(feature = "business")]
Business(func) => return write!(f, "{func}"),
#[cfg(feature = "abs")]
Abs => "abs",
Negate => "negate",
Expand Down Expand Up @@ -815,6 +825,8 @@ impl From<FunctionExpr> for SpecialEq<Arc<dyn SeriesUdf>> {

// Other expressions
Boolean(func) => func.into(),
#[cfg(feature = "business")]
Business(func) => func.into(),
#[cfg(feature = "abs")]
Abs => map!(abs::abs),
Negate => map!(dispatch::negate),
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ impl FunctionExpr {

// Other expressions
Boolean(func) => func.get_field(mapper),
#[cfg(feature = "business")]
Business(_) => mapper.with_dtype(DataType::Int32),
#[cfg(feature = "abs")]
Abs => mapper.with_same_dtype(),
Negate => mapper.with_same_dtype(),
Expand Down
15 changes: 15 additions & 0 deletions crates/polars-plan/src/dsl/functions/business.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
use super::*;

#[cfg(feature = "dtype-date")]
pub fn business_day_count(start: Expr, end: Expr) -> Expr {
let input = vec![start, end];

Expr::Function {
input,
function: FunctionExpr::Business(BusinessFunction::BusinessDayCount {}),
options: FunctionOptions {
allow_rename: true,
..Default::default()
},
}
}
4 changes: 4 additions & 0 deletions crates/polars-plan/src/dsl/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
//!
//! Functions on expressions that might be useful.
mod arity;
#[cfg(feature = "business")]
mod business;
#[cfg(feature = "dtype-struct")]
mod coerce;
mod concat;
Expand All @@ -18,6 +20,8 @@ mod syntactic_sugar;
mod temporal;

pub use arity::*;
#[cfg(all(feature = "business", feature = "dtype-date"))]
pub use business::*;
#[cfg(feature = "dtype-struct")]
pub use coerce::*;
pub use concat::*;
Expand Down
1 change: 1 addition & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ array_any_all = ["polars-lazy?/array_any_all", "dtype-array"]
asof_join = ["polars-lazy?/asof_join", "polars-ops/asof_join"]
bigidx = ["polars-core/bigidx", "polars-lazy?/bigidx", "polars-ops/big_idx"]
binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding", "polars-sql?/binary_encoding"]
business = ["polars-lazy?/business", "polars-ops/business"]
checked_arithmetic = ["polars-core/checked_arithmetic"]
chunked_ids = ["polars-ops?/chunked_ids"]
coalesce = ["polars-lazy?/coalesce"]
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ features = [
"abs",
"approx_unique",
"arg_where",
"business",
"concat_str",
"cum_agg",
"cumulative_eval",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ These functions are available from the Polars module root and can be used as exp
arctan2d
arg_sort_by
arg_where
business_day_count
coalesce
concat_list
concat_str
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
arctan2d,
arg_sort_by,
arg_where,
business_day_count,
coalesce,
col,
collect_all,
Expand Down Expand Up @@ -330,6 +331,7 @@
# polars.functions
"align_frames",
"arg_where",
"business_day_count",
"concat",
"date_range",
"date_ranges",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from polars.functions.as_datatype import date_ as date
from polars.functions.as_datatype import datetime_ as datetime
from polars.functions.as_datatype import time_ as time
from polars.functions.business import business_day_count
from polars.functions.col import col
from polars.functions.eager import align_frames, concat
from polars.functions.lazy import (
Expand Down Expand Up @@ -124,6 +125,7 @@
"arctan2",
"arctan2d",
"arg_sort_by",
"business_day_count",
"coalesce",
"col",
"collect_all",
Expand Down
Loading
Loading