Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 9 additions & 17 deletions chainladder/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,24 +290,24 @@ def _get_date_axes(
end=origin_date.max(),
freq=origin_grain
).to_timestamp(how="s")

development_range: DatetimeIndex = pd.period_range(
start=development_date.min(),
end=development_date.max(),
freq=development_grain,
).to_timestamp(how="e")

# If the development is semi-annual, we need to adjust further because of "2Q-DEC".
if development_grain == "2Q-DEC":
if development_grain[:2] == "2Q":
from pandas.tseries.offsets import DateOffset

development_range += DateOffset(months=-3)

c = pd.DataFrame(
TriangleBase._cartesian_product(origin_range, development_range),
columns=["__origin__", "__development__"],
)

return c[c["__development__"] > c["__origin__"]]

@property
Expand Down Expand Up @@ -407,25 +407,17 @@ def _get_grain(
Set to False if you want to treat December as period end. Set
to True if you want it inferred from the data.
"""
months: np.ndarray = dates.dt.month.unique()
months: np.ndarray = (dates.dt.year * 12 + dates.dt.month).unique()
diffs: np.ndarray = np.diff(np.sort(months))
if len(dates.unique()) == 1:
if np.all(np.mod(diffs,12) == 0):
grain = (
"Y"
if version.Version(pd.__version__) >= version.Version("2.2.0")
else "A"
)

elif len(months) == 1:
grain = (
"Y"
if version.Version(pd.__version__) >= version.Version("2.2.0")
else "A"
)

elif np.all(diffs == 6):
elif np.all(np.mod(diffs,6) == 0):
grain = "2Q"
elif np.all(diffs == 3):
elif np.all(np.mod(diffs,3) == 0):
grain = "Q"
else:
grain = "M"
Expand Down
13 changes: 12 additions & 1 deletion chainladder/core/tests/test_correlation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
import chainladder as cl
import pytest

raa = cl.load_sample("RAA")

def test_val_corr():
def test_val_corr_total_true():
assert raa.valuation_correlation(p_critical=0.5, total=True)

def test_val_corr_total_false():
assert raa.valuation_correlation(p_critical=0.5, total=False)

def test_dev_corr():
assert raa.development_correlation(p_critical=0.5)

def test_dev_corr_sparse():
assert raa.set_backend('sparse').development_correlation(p_critical=0.5)

def test_validate_critical():
with pytest.raises(ValueError):
raa.valuation_correlation(p_critical=1.5, total=True)
34 changes: 34 additions & 0 deletions chainladder/core/tests/test_triangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,3 +841,37 @@ def test_odd_quarter_end():
data_from_tri = triangle.to_frame(origin_as_datetime=True)
assert np.all(data_from_tri['2024Q2'].values == [100.,130.,160.,140.])
assert np.all(data_from_tri.index == pd.DatetimeIndex(data=["5/1/2023","8/1/2023","11/1/2023","2/1/2024"],freq = 'QS-NOV'))

def test_OXDX_triangle():

for x in [12,6,3,1]:
for y in [i for i in [12,6,3,1] if i <= x]:
first_orig = '2020-01-01'
width = int(x / y) + 1
dev_series = (pd.date_range(start=first_orig,periods = width, freq = str(y) + 'ME') + pd.DateOffset(months=y-1)).to_series()
tri_df = pd.DataFrame({
'origin_date': pd.concat([pd.to_datetime([first_orig] * (width)).to_series(), (pd.to_datetime([first_orig]) + pd.DateOffset(months=x)).to_series()]).to_list(),
'development_date': pd.concat([dev_series,dev_series.iloc[[0]] + pd.DateOffset(months=x)]).to_list(),
'value': list(range(1,width + 2))
})
for i in range(12):
for j in range(y):
test_data = tri_df.copy()
test_data['origin_date'] += pd.DateOffset(months=i)
test_data['development_date'] += pd.DateOffset(months=i-j)
tri = cl.Triangle(
test_data,
origin='origin_date',
development='development_date',
columns='value',
cumulative=True
)
assert tri.shape == (1,1,2,width)
assert tri.sum().sum() == tri_df['value'].sum()
assert np.all(tri.development == [y-j + x * y for x in range(width)])
#there's a known bug with origin that displays incorrect year when origin doesn't start on 1/1
#if x == 12:
#assert np.all(tri.origin == ['2020','2021'])
#elif x in [6,3]:
#assert np.all(tri.origin.strftime('%Y') == pd.to_datetime(tri.odims).strftime('%Y'))
#assert np.all(tri.origin.strftime('%q').values.astype(float) == np.ceil((pd.to_datetime(tri.odims).strftime('%m').values.astype(int) - 0.5) / 3))
16 changes: 6 additions & 10 deletions chainladder/core/triangle.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from chainladder.utils.sparse import sp
from chainladder.core.slice import VirtualColumns
from chainladder.core.correlation import DevelopmentCorrelation, ValuationCorrelation
from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value
from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value, to_period
from chainladder import options

try:
Expand Down Expand Up @@ -182,16 +182,12 @@ def __init__(

# Ensure that origin_date values represent the beginning of the period.
# i.e., 1990 means the start of 1990.
origin_date: Series = origin_date.dt.to_period(
self.origin_grain
).dt.to_timestamp(how="s")

origin_date: Series = to_period(origin_date,self.origin_grain).dt.to_timestamp(how="s")

# Ensure that development_date values represent the end of the period.
# i.e., 1990 means the end of 1990 assuming annual development periods.
development_date: Series = development_date.dt.to_period(
self.development_grain
).dt.to_timestamp(how="e")

development_date: Series = to_period(development_date,self.development_grain).dt.to_timestamp(how="e")

# Aggregate dates to the origin/development grains.
data_agg: DataFrame = self._aggregate_data(
data=data,
Expand Down Expand Up @@ -227,7 +223,7 @@ def __init__(
self.vdims = np.array(columns)
self.odims, orig_idx = self._set_odims(data_agg, date_axes)
self.ddims, dev_idx = self._set_ddims(data_agg, date_axes)

# Set remaining triangle properties.
val_date: Timestamp = data_agg["__development__"].max()
val_date = val_date.compute() if hasattr(val_date, "compute") else val_date
Expand Down
5 changes: 5 additions & 0 deletions chainladder/utils/utility_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,11 @@ def minimum(x1, x2):
def maximum(x1, x2):
return x1.maximum(x2)

def to_period(dateseries: pd.Series, freq:str):
if freq[:2] != '2Q':
return dateseries.dt.to_period(freq)
else:
return dateseries.where(dateseries.dt.to_period(freq).dt.strftime('%q').isin(['1','3']),dateseries.dt.date + pd.DateOffset(months=-3)).dt.to_period(freq)

class PatsyFormula(BaseEstimator, TransformerMixin):
"""A sklearn-style Transformer for patsy formulas.
Expand Down