Skip to content

Commit

Permalink
[Doc] aggregate showdoc + external reconciliation tutorials' improv…
Browse files Browse the repository at this point in the history
…ements (#214)

* Improvements to GluonTS compatibility tutorial

* Tutorials' sidebar order

* Improved tutorials' references and introduction

* Improved utils' documentation, particularly external forecast adapters
  • Loading branch information
kdgutier committed Jun 21, 2023
1 parent 8382b29 commit 1705e6a
Show file tree
Hide file tree
Showing 5 changed files with 1,548 additions and 1,544 deletions.
216 changes: 109 additions & 107 deletions hierarchicalforecast/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.

# %% auto 0
__all__ = ['HierarchicalPlot']
__all__ = ['aggregate', 'HierarchicalPlot']

# %% ../nbs/utils.ipynb 2
# %% ../nbs/utils.ipynb 3
import sys
import timeit
from itertools import chain
Expand All @@ -16,7 +16,7 @@

plt.rcParams['font.family'] = 'serif'

# %% ../nbs/utils.ipynb 4
# %% ../nbs/utils.ipynb 5
class CodeTimer:
def __init__(self, name=None, verbose=True):
self.name = " '" + name + "'" if name else ''
Expand All @@ -31,7 +31,7 @@ def __exit__(self, exc_type, exc_value, traceback):
print('Code block' + self.name + \
' took:\t{0:.5f}'.format(self.took) + ' seconds')

# %% ../nbs/utils.ipynb 5
# %% ../nbs/utils.ipynb 6
def is_strictly_hierarchical(S: np.ndarray,
tags: Dict[str, np.ndarray]):
# main idea:
Expand All @@ -49,7 +49,7 @@ def is_strictly_hierarchical(S: np.ndarray,
nodes = levels_.popitem()[1].size
return paths == nodes

# %% ../nbs/utils.ipynb 6
# %% ../nbs/utils.ipynb 7
def cov2corr(cov, return_std=False):
""" convert covariance matrix to correlation matrix
Expand All @@ -68,105 +68,7 @@ def cov2corr(cov, return_std=False):
else:
return corr

# %% ../nbs/utils.ipynb 7
# convert levels to output quantile names
def level_to_outputs(level:Iterable[int]):
""" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.
**Parameters:**<br>
`level`: int list [0,100]. Probability levels for prediction intervals.<br>
**Returns:**<br>
`output_names`: str list. String list with output column names.
"""
qs = sum([[50-l/2, 50+l/2] for l in level], [])
output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], [])

sort_idx = np.argsort(qs)
quantiles = np.array(qs)[sort_idx]

# Add default median
quantiles = np.concatenate([np.array([50]), quantiles]) / 100
output_names = list(np.array(output_names)[sort_idx])
output_names.insert(0, '-median')

return quantiles, output_names

# convert quantiles to output quantile names
def quantiles_to_outputs(quantiles:Iterable[float]):
"""Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods.
**Parameters:**<br>
`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.<br>
**Returns:**<br>
`output_names`: str list. String list with output column names.
"""
output_names = []
for q in quantiles:
if q<.50:
output_names.append(f'-lo-{np.round(100-200*q,2)}')
elif q>.50:
output_names.append(f'-hi-{np.round(100-200*(1-q),2)}')
else:
output_names.append('-median')
return quantiles, output_names

# %% ../nbs/utils.ipynb 8
# given input array of sample forecasts and inptut quantiles/levels,
# output a Pandas Dataframe with columns of quantile predictions
def samples_to_quantiles_df(samples:np.ndarray,
unique_ids:Iterable[str],
dates:Iterable,
quantiles:Optional[Iterable[float]] = None,
level:Optional[Iterable[int]] = None,
model_name:Optional[str] = "model"):
""" Transform Samples into HierarchicalForecast input.
Auxiliary function to create compatible HierarchicalForecast input Y_hat_df dataframe.
**Parameters:**<br>
`samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].<br>
`unique_ids`: string list. Unique identifiers for each time series.<br>
`dates`: datetime list. List of forecast dates.<br>
`quantiles`: float list in [0., 1.]. Alternative to level, quantiles to estimate from y distribution.<br>
`level`: int list in [0,100]. Probability levels for prediction intervals.<br>
`model_name`: string. Name of forecasting model.<br>
**Returns:**<br>
`quantiles`: float list in [0., 1.]. quantiles to estimate from y distribution .<br>
`Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id.
"""

# Get the shape of the array
n_series, n_samples, horizon = samples.shape

assert n_series == len(unique_ids)
assert horizon == len(dates)
assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input

#create initial dictionary
forecasts_mean = np.mean(samples, axis=1).flatten()
unique_ids = np.repeat(unique_ids, horizon)
ds = np.tile(dates, n_series)
data = pd.DataFrame({"unique_id":unique_ids, "ds":ds, model_name:forecasts_mean})

#create quantiles and quantile names
quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles)
percentiles = [quantile * 100 for quantile in quantiles]
col_names = np.array([model_name + quantile_name for quantile_name in quantile_names])

#add quantiles to dataframe
forecasts_quantiles = np.percentile(samples, percentiles, axis=1)

forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]
forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles))

df = pd.DataFrame(data=forecasts_quantiles,
columns=col_names)

return quantiles, pd.concat([data,df], axis=1).set_index('unique_id')

# %% ../nbs/utils.ipynb 11
# %% ../nbs/utils.ipynb 9
def _to_summing_matrix(S_df: pd.DataFrame):
"""Transforms the DataFrame `df` of hierarchies to a summing matrix S."""
categories = [S_df[col].unique() for col in S_df.columns]
Expand All @@ -179,7 +81,7 @@ def _to_summing_matrix(S_df: pd.DataFrame):
tags = dict(zip(S_df.columns, categories))
return S, tags

# %% ../nbs/utils.ipynb 12
# %% ../nbs/utils.ipynb 10
def aggregate_before(df: pd.DataFrame,
spec: List[List[str]],
agg_fn: Callable = np.sum):
Expand Down Expand Up @@ -221,7 +123,7 @@ def aggregate_before(df: pd.DataFrame,
S, tags = _to_summing_matrix(S_df.loc[bottom_hier, hiers_cols])
return Y_df, S, tags

# %% ../nbs/utils.ipynb 13
# %% ../nbs/utils.ipynb 11
def numpy_balance(*arrs):
"""
Fast NumPy implementation of balance function.
Expand Down Expand Up @@ -287,12 +189,14 @@ def _to_summing_dataframe(df: pd.DataFrame,
Y_bottom_df.unique_id = Y_bottom_df.unique_id.cat.set_categories(S_df.columns)
return Y_bottom_df, S_df, tags

# %% ../nbs/utils.ipynb 12
def aggregate(df: pd.DataFrame,
spec: List[List[str]],
is_balanced: bool=False):
""" Utils Aggregation Function.
Aggregates bottom level series contained in the pd.DataFrame `df` according
to levels defined in the `spec` list applying the `agg_fn` (sum, mean).
**Parameters:**<br>
`df`: pd.DataFrame with columns `['ds', 'y']` and columns to aggregate.<br>
`spec`: List of levels. Each element of the list contains a list of columns of `df` to aggregate.<br>
Expand Down Expand Up @@ -349,7 +253,7 @@ def aggregate(df: pd.DataFrame,
Y_df = Y_df.set_index('unique_id').dropna()
return Y_df, S_df, tags

# %% ../nbs/utils.ipynb 22
# %% ../nbs/utils.ipynb 19
class HierarchicalPlot:
""" Hierarchical Plot
Expand Down Expand Up @@ -542,3 +446,101 @@ def plot_hierarchical_predictions_gap(self,
plt.legend()
plt.grid()
plt.show()

# %% ../nbs/utils.ipynb 34
# convert levels to output quantile names
def level_to_outputs(level:Iterable[int]):
""" Converts list of levels into output names matching StatsForecast and NeuralForecast methods.
**Parameters:**<br>
`level`: int list [0,100]. Probability levels for prediction intervals.<br>
**Returns:**<br>
`output_names`: str list. String list with output column names.
"""
qs = sum([[50-l/2, 50+l/2] for l in level], [])
output_names = sum([[f'-lo-{l}', f'-hi-{l}'] for l in level], [])

sort_idx = np.argsort(qs)
quantiles = np.array(qs)[sort_idx]

# Add default median
quantiles = np.concatenate([np.array([50]), quantiles]) / 100
output_names = list(np.array(output_names)[sort_idx])
output_names.insert(0, '-median')

return quantiles, output_names

# convert quantiles to output quantile names
def quantiles_to_outputs(quantiles:Iterable[float]):
"""Converts list of quantiles into output names matching StatsForecast and NeuralForecast methods.
**Parameters:**<br>
`quantiles`: float list [0., 1.]. Alternative to level, quantiles to estimate from y distribution.<br>
**Returns:**<br>
`output_names`: str list. String list with output column names.
"""
output_names = []
for q in quantiles:
if q<.50:
output_names.append(f'-lo-{np.round(100-200*q,2)}')
elif q>.50:
output_names.append(f'-hi-{np.round(100-200*(1-q),2)}')
else:
output_names.append('-median')
return quantiles, output_names

# %% ../nbs/utils.ipynb 35
# given input array of sample forecasts and inptut quantiles/levels,
# output a Pandas Dataframe with columns of quantile predictions
def samples_to_quantiles_df(samples:np.ndarray,
unique_ids:Iterable[str],
dates:Iterable,
quantiles:Optional[Iterable[float]] = None,
level:Optional[Iterable[int]] = None,
model_name:Optional[str] = "model"):
""" Transform Random Samples into HierarchicalForecast input.
Auxiliary function to create compatible HierarchicalForecast input `Y_hat_df` dataframe.
**Parameters:**<br>
`samples`: numpy array. Samples from forecast distribution of shape [n_series, n_samples, horizon].<br>
`unique_ids`: string list. Unique identifiers for each time series.<br>
`dates`: datetime list. List of forecast dates.<br>
`quantiles`: float list in [0., 1.]. Alternative to level, quantiles to estimate from y distribution.<br>
`level`: int list in [0,100]. Probability levels for prediction intervals.<br>
`model_name`: string. Name of forecasting model.<br>
**Returns:**<br>
`quantiles`: float list in [0., 1.]. quantiles to estimate from y distribution .<br>
`Y_hat_df`: pd.DataFrame. With base quantile forecasts with columns ds and models to reconcile indexed by unique_id.
"""

# Get the shape of the array
n_series, n_samples, horizon = samples.shape

assert n_series == len(unique_ids)
assert horizon == len(dates)
assert (quantiles is not None) ^ (level is not None) #check exactly one of quantiles/levels has been input

#create initial dictionary
forecasts_mean = np.mean(samples, axis=1).flatten()
unique_ids = np.repeat(unique_ids, horizon)
ds = np.tile(dates, n_series)
data = pd.DataFrame({"unique_id":unique_ids, "ds":ds, model_name:forecasts_mean})

#create quantiles and quantile names
quantiles, quantile_names = level_to_outputs(level) if level is not None else quantiles_to_outputs(quantiles)
percentiles = [quantile * 100 for quantile in quantiles]
col_names = np.array([model_name + quantile_name for quantile_name in quantile_names])

#add quantiles to dataframe
forecasts_quantiles = np.percentile(samples, percentiles, axis=1)

forecasts_quantiles = np.transpose(forecasts_quantiles, (1,2,0)) # [Q,H,N] -> [N,H,Q]
forecasts_quantiles = forecasts_quantiles.reshape(-1,len(quantiles))

df = pd.DataFrame(data=forecasts_quantiles,
columns=col_names)

return quantiles, pd.concat([data,df], axis=1).set_index('unique_id')
Loading

0 comments on commit 1705e6a

Please sign in to comment.