Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,17 @@ Deploy
~write_app
~write_docker

Monitor
==================

.. autosummary::
:toctree: reference/
:caption: Monitor

~compute_metrics
~pin_metrics
~plot_metrics

Advanced Usage
==================
.. toctree::
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ install_requires =
requests
pins
rsconnect-python
plotly

[options.extras_require]
dev =
pytest
pytest-cov
pytest-snapshot
sphinx
sphinx-autodoc-typehints
sphinx-book-theme
Expand Down
3 changes: 2 additions & 1 deletion vetiver/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@
from .handlers.base import VetiverHandler # noqa
from .handlers.sklearn import SKLearnHandler # noqa
from .handlers.torch import TorchHandler # noqa
from .rsconnect import deploy_rsconnect
from .rsconnect import deploy_rsconnect # noqa
from .monitor import compute_metrics, pin_metrics, plot_metrics, _rolling_df # noqa
166 changes: 166 additions & 0 deletions vetiver/monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import datetime
import pins
from pins.errors import PinsError
import plotly.express as px
import pandas as pd
from datetime import datetime, timedelta


def compute_metrics(
data: pd.DataFrame,
date_var: str,
period: timedelta,
metric_set: list,
truth: str,
estimate: str,
) -> pd.DataFrame:
"""
Compute metrics for given time period

Parameters
----------
data : DataFrame
Pandas dataframe
date_var:
Column in `data` containing dates
period: datetime.timedelta
Defining period to group by
metric_set: list
List of metrics to compute, that have the parameters `y_true` and `y_pred`
truth:
Column name for true results
estimate:
Column name for predicted results

Example
-------
from sklearn import metrics
rng = pd.date_range("1/1/2012", periods=10, freq="S")
new = dict(x=range(len(rng)), y = range(len(rng)))
df = pd.DataFrame(new, index = rng).reset_index(inplace=True)
td = timedelta(seconds = 2)
metric_set = [sklearn.metrics.mean_squared_error, sklearn.metrics.mean_absolute_error]
compute_metrics(df, "index", td, metric_set=metric_set, truth="x", estimate="y")

"""

df = data[[truth, estimate, date_var]].set_index(date_var).sort_index()
lst = [_ for _ in _rolling_df(df=df, td=period)]

rows = []
for i in lst:
for m in metric_set:
rows = rows + [
{
"index": i.index[0],
"n": len(i),
"metric": m.__qualname__,
"estimate": m(y_pred=i[truth], y_true=i[estimate]),
}
]

outdf = pd.DataFrame.from_dict(rows)

return outdf


def _rolling_df(df: pd.DataFrame, td: timedelta):
first = df.index[0]
last = df.index[-1]

while first < last:
stop = first + td
boolidx = (first <= df.index) & (df.index < stop)
yield df[boolidx].copy()
first = stop


def pin_metrics(board, df_metrics, metrics_pin_name, overwrite=False):
pass


# """
# Update an existing pin storing model metrics over time

# Parameters
# ----------
# board :
# Pins board
# df_metrics: pd.DataFrame
# Dataframe of metrics over time, such as created by `vetiver_compute_metrics()`
# metrics_pin_name:
# Pin name for where the metrics are stored
# overwrite: bool
# If TRUE (the default), overwrite any metrics for
# dates that exist both in the existing pin and
# new metrics with the new values. If FALSE, error
# when the new metrics contain overlapping dates with
# the existing pin.
# """
# date_types = (datetime.date, datetime.time, datetime.datetime)
# if not isinstance(df_metrics.index, date_types):
# try:
# df_metrics = df_metrics.index.astype("datetime")
# except TypeError:
# raise TypeError(f"Index of {df_metrics} must be a date type")

# new_metrics = df_metrics.sort_index()

# new_dates = df_metrics.index.unique()

# try:
# old_metrics = board.pin_read(metrics_pin_name)
# except PinsError:
# board.pin_write(metrics_pin_name)

# overlapping_dates = old_metrics.index in new_dates

# if overwrite is True:
# old_metrics = old_metrics not in overlapping_dates
# else:
# if overlapping_dates:
# raise ValueError(
# f"The new metrics overlap with dates \
# already stored in {repr(metrics_pin_name)} \
# Check the aggregated dates or use `overwrite = True`"
# )

# new_metrics = old_metrics + df_metrics
# new_metrics = new_metrics.sort_index()

# pins.pin_write(board, new_metrics, metrics_pin_name)


def plot_metrics(
df_metrics, date="index", estimate="estimate", metric="metric", n="n", **kw
) -> px.line:
"""
Plot metrics over a given time period

Parameters
----------
df_metrics : DataFrame
Pandas dataframe of metrics over time, such as created by `compute_metircs()`
date: str
Column in `df_metrics` containing dates
estimate: str
Column in `df_metrics` containing metric output
metric: str
Column in `df_metrics` containing metric name
n: str
Column in `df_metrics` containing number of observations
"""

fig = px.line(
df_metrics,
x=date,
y=estimate,
color=metric,
facet_row=metric,
markers=n,
**kw,
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_layout(showlegend=False)

return fig
1 change: 1 addition & 0 deletions vetiver/tests/snapshots/test_monitor.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"index":{"0":1325376000000,"1":1325376000000,"2":1325376002000,"3":1325376002000,"4":1325376004000,"5":1325376004000,"6":1325376006000,"7":1325376006000,"8":1325376008000,"9":1325376008000},"n":{"0":2,"1":2,"2":2,"3":2,"4":2,"5":2,"6":2,"7":2,"8":2,"9":2},"metric":{"0":"mean_squared_error","1":"mean_absolute_error","2":"mean_squared_error","3":"mean_absolute_error","4":"mean_squared_error","5":"mean_absolute_error","6":"mean_squared_error","7":"mean_absolute_error","8":"mean_squared_error","9":"mean_absolute_error"},"estimate":{"0":0.0,"1":0.0,"2":0.0,"3":0.0,"4":0.0,"5":0.0,"6":0.0,"7":0.0,"8":0.0,"9":0.0}}
36 changes: 36 additions & 0 deletions vetiver/tests/test_monitor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from sklearn import metrics
from datetime import timedelta
import pandas as pd
import numpy
import vetiver

rng = pd.date_range("1/1/2012", periods=10, freq="S")
new = dict(x=range(len(rng)), y=range(len(rng)))
df = pd.DataFrame(new, index=rng)
td = timedelta(seconds=2)
metric_set = [metrics.mean_squared_error, metrics.mean_absolute_error]

def test_rolling():
m = [_ for _ in vetiver._rolling_df(df, td)]
assert len(m) == 5
assert len(m[0]) == 2

def test_compute():
df.reset_index(inplace=True)
m = vetiver.compute_metrics(
df, "index", td, metric_set=metric_set, truth="x", estimate="y"
)
assert isinstance(m, pd.DataFrame)
assert m.shape == (10, 4)
numpy.testing.assert_array_equal(
m.metric.unique(),
numpy.array(["mean_squared_error", "mean_absolute_error"], dtype=object),
)

def test_monitor(snapshot):
snapshot.snapshot_dir = './vetiver/tests/snapshots'
m = vetiver.compute_metrics(
df, "index", td, metric_set=metric_set, truth="x", estimate="y"
)
vetiver.plot_metrics(m)
snapshot.assert_match(m.to_json(), 'test_monitor.json')