Skip to content

Commit

Permalink
implement percentile_xsection tests
Browse files Browse the repository at this point in the history
  • Loading branch information
0xfdf committed Aug 5, 2024
1 parent f343b70 commit 4581917
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 32 deletions.
14 changes: 13 additions & 1 deletion toraniko/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def winsorize_group(group: pl.DataFrame) -> pl.DataFrame:
return grouped


def xsection_percentiles(
def percentiles_xsection(
target_col: str,
over_col: str,
lower_pct: float,
Expand Down Expand Up @@ -182,5 +182,17 @@ def exp_weights(window: int, half_life: int) -> np.ndarray:
-------
numpy array
"""
try:
assert isinstance(window, int)
if not window > 0:
raise ValueError("`window` must be a strictly positive integer")
except (AttributeError, AssertionError) as e:
raise TypeError("`window` must be an integer type") from e
try:
assert isinstance(half_life, int)
if not half_life > 0:
raise ValueError("`half_life` must be a strictly positive integer")
except (AttributeError, AssertionError) as e:
raise TypeError("`half_life` must be an integer type") from e
decay = np.log(2) / half_life
return np.exp(-decay * np.arange(window))[::-1]
40 changes: 10 additions & 30 deletions toraniko/styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from toraniko.math import (
exp_weights,
center_xsection,
xsection_percentiles,
percentiles_xsection,
winsorize_xsection,
)

Expand Down Expand Up @@ -47,9 +47,7 @@ def weighted_cumprod(values: np.ndarray) -> float:
df = (
returns_df.lazy()
.sort(by="date")
.with_columns(
pl.col("asset_returns").shift(lag).over("symbol").alias("asset_returns")
)
.with_columns(pl.col("asset_returns").shift(lag).over("symbol").alias("asset_returns"))
.with_columns(
pl.col("asset_returns")
.rolling_map(weighted_cumprod, window_size=trailing_days)
Expand All @@ -64,13 +62,9 @@ def weighted_cumprod(values: np.ndarray) -> float:
center_xsection("mom_score", "date", True).alias("mom_score"),
)
except AttributeError as e:
raise TypeError(
"`returns_df` must be a Polars DataFrame | LazyFrame, but it's missing attributes"
) from e
raise TypeError("`returns_df` must be a Polars DataFrame | LazyFrame, but it's missing attributes") from e
except pl.ColumnNotFoundError as e:
raise ValueError(
"`returns_df` must have 'date', 'symbol' and 'asset_returns' columns"
) from e
raise ValueError("`returns_df` must have 'date', 'symbol' and 'asset_returns' columns") from e


def factor_sze(
Expand Down Expand Up @@ -99,26 +93,16 @@ def factor_sze(
"symbol",
(center_xsection("sze_score", "date", True)).alias("sze_score"),
)
.with_columns(
xsection_percentiles(
"sze_score", "date", lower_decile, upper_decile, 0.0
).alias("sze_score")
)
.with_columns(percentiles_xsection("sze_score", "date", lower_decile, upper_decile, 0.0).alias("sze_score"))
.select("date", "symbol", "sze_score")
)
except AttributeError as e:
raise TypeError(
"`mkt_cap_df` must be a Polars DataFrame or LazyFrame, but it's missing attributes"
) from e
raise TypeError("`mkt_cap_df` must be a Polars DataFrame or LazyFrame, but it's missing attributes") from e
except pl.ColumnNotFoundError as e:
raise ValueError(
"`mkt_cap_df` must have 'date', 'symbol' and 'market_cap' columns"
) from e
raise ValueError("`mkt_cap_df` must have 'date', 'symbol' and 'market_cap' columns") from e


def factor_val(
value_df: pl.DataFrame | pl.LazyFrame, winsorize_features: float | None = None
) -> pl.LazyFrame:
def factor_val(value_df: pl.DataFrame | pl.LazyFrame, winsorize_features: float | None = None) -> pl.LazyFrame:
"""Estimate rolling symbol by symbol value factor scores using price ratios.
Parameters
Expand All @@ -132,9 +116,7 @@ def factor_val(
"""
try:
if winsorize_features is not None:
value_df = winsorize_xsection(
value_df, ("book_price", "sales_price", "cf_price"), "date"
)
value_df = winsorize_xsection(value_df, ("book_price", "sales_price", "cf_price"), "date")
return (
value_df.lazy()
.with_columns(
Expand All @@ -161,9 +143,7 @@ def factor_val(
)
)
except AttributeError as e:
raise TypeError(
"`value_df` must be a Polars DataFrame or LazyFrame, but it's missing attributes"
) from e
raise TypeError("`value_df` must be a Polars DataFrame or LazyFrame, but it's missing attributes") from e
except pl.ColumnNotFoundError as e:
raise ValueError(
"`value_df` must have 'date', 'symbol', 'book_price', 'sales_price' and 'fcf_price' columns"
Expand Down
154 changes: 153 additions & 1 deletion toraniko/tests/test_math.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@
import polars as pl
import numpy as np
from polars.testing import assert_frame_equal
from toraniko.math import center_xsection, norm_xsection, winsorize, winsorize_xsection
from toraniko.math import (
center_xsection,
exp_weights,
norm_xsection,
percentiles_xsection,
winsorize,
winsorize_xsection,
)


@pytest.fixture
Expand Down Expand Up @@ -632,3 +639,148 @@ def test_winsorize_xsection_with_nans(sample_df_with_nans):
)

assert_frame_equal(result, expected, check_exact=False)


###
# `xsection_percentiles`
###


def test_xsection_percentiles(sample_df):
"""
Test basic functionality of xsection_percentiles.
"""
result = sample_df.with_columns(percentiles_xsection("value1", "group", 0.25, 0.75).alias("result")).sort("group")

expected = pl.DataFrame(
{
"group": ["A", "A", "A", "B", "B", "B", "C", "C", "C"],
"value1": [1, 2, 10, 4, 5, 20, 7, 8, 30],
"result": [1.0, 2.0, 10.0, 4.0, 5.0, 20.0, 7.0, 8.0, 30.0],
}
)

pl.testing.assert_frame_equal(result.select("group", "value1", "result"), expected)


def test_xsection_percentiles_with_nans(sample_df_with_nans):
"""
Test xsection_percentiles with NaN values.
"""
result = sample_df_with_nans.with_columns(percentiles_xsection("value1", "group", 0.25, 0.75).alias("result"))

expected = pl.DataFrame(
{
"group": ["A", "A", "A", "B", "B", "B", "C", "C", "C"],
"value1": [1.0, np.nan, 10.0, 4.0, 5.0, np.nan, 7.0, 8.0, 30.0],
"result": [1.0, np.nan, 10.0, 4.0, 5.0, np.nan, 7.0, 8.0, 30.0],
}
)

pl.testing.assert_frame_equal(result.select("group", "value1", "result"), expected)


###
# `exp_weights`
###


def test_exp_weights_basic():
"""
Test basic functionality of exp_weights.
"""
result = exp_weights(window=5, half_life=2)
expected = np.array([0.25, 0.35355339, 0.5, 0.70710678, 1.0])
np.testing.assert_array_almost_equal(result, expected, decimal=6)


def test_exp_weights_window_1():
"""
Test exp_weights with window of 1.
"""
result = exp_weights(window=1, half_life=2)
expected = np.array([1.0])
np.testing.assert_array_almost_equal(result, expected)


def test_exp_weights_half_life_1():
"""
Test exp_weights with half_life of 1.
"""
result = exp_weights(window=5, half_life=1)
expected = np.array([0.0625, 0.125, 0.25, 0.5, 1.0])
np.testing.assert_array_almost_equal(result, expected)


def test_exp_weights_large_window():
"""
Test exp_weights with a large window.
"""
result = exp_weights(window=100, half_life=10)
assert len(result) == 100
assert result[-1] == 1.0
assert result[0] < result[-1]


def test_exp_weights_decreasing():
"""
Test that weights are decreasing from end to start.
"""
result = exp_weights(window=10, half_life=3)
assert np.all(np.diff(result) > 0)


def test_exp_weights_half_life():
"""
Test that weights actually decay by half each half_life.
"""
half_life = 5
window = 20
weights = exp_weights(window, half_life)
for i in range(0, window - half_life, half_life):
assert np.isclose(weights[i], 0.5 * weights[i + half_life], rtol=1e-5)


def test_exp_weights_invalid_window():
"""
Test exp_weights with invalid window value.
"""
with pytest.raises(ValueError):
exp_weights(window=0, half_life=2)

with pytest.raises(ValueError):
exp_weights(window=-1, half_life=2)

with pytest.raises(TypeError):
exp_weights(window="window", half_life=2)

with pytest.raises(TypeError):
exp_weights(window=5.1, half_life=3)


def test_exp_weights_invalid_half_life():
"""
Test exp_weights with invalid half_life value.
"""
with pytest.raises(ValueError):
exp_weights(window=5, half_life=0)

with pytest.raises(ValueError):
exp_weights(window=5, half_life=-1)

with pytest.raises(TypeError):
exp_weights(window=5, half_life="half_life")

with pytest.raises(TypeError):
exp_weights(window=5, half_life=3.2)


def test_output():
"""
Test with a specific input and output.
"""
result = exp_weights(10, 10)
expected = np.array(
[0.53588673, 0.57434918, 0.61557221, 0.65975396, 0.70710678, 0.75785828, 0.8122524, 0.87055056, 0.93303299, 1.0]
)
assert np.testing.array_almost_equal(result, expected)

0 comments on commit 4581917

Please sign in to comment.