Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Doc] First iteration of HierE2E baseline execution + Documentation detail improvements #212

Merged
merged 11 commits into from
Jun 22, 2023
710 changes: 710 additions & 0 deletions experiments/hierarchical_baselines/nbs/run_hiere2e.ipynb

Large diffs are not rendered by default.

74 changes: 63 additions & 11 deletions experiments/hierarchical_baselines/src/run_baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from hierarchicalforecast.utils import is_strictly_hierarchical
from hierarchicalforecast.utils import HierarchicalPlot, CodeTimer
from hierarchicalforecast.evaluation import scaled_crps, msse, energy_score
from hierarchicalforecast.evaluation import scaled_crps, rel_mse, msse

from datasetsforecast.hierarchical import HierarchicalData, HierarchicalInfo

Expand All @@ -34,27 +34,40 @@ class HierarchicalDataset(object):
@staticmethod
def _get_hierarchical_scrps(hier_idxs, Y, Yq_hat, q_to_pred):
# We use the indexes obtained from the aggregation tags
# to compute scaled CRPS across the hierarchy levels
# to compute scaled CRPS across the hierarchy levels
scrps_list = []
for idxs in hier_idxs:
y = Y[idxs, :]
yq_hat = Yq_hat[idxs, :, :]
scrps = scaled_crps(y, yq_hat, q_to_pred)
scrps_list.append(scrps)
level_scrps = scaled_crps(y, yq_hat, q_to_pred)
scrps_list.append(level_scrps)
return scrps_list

@staticmethod
def _get_hierarchical_msse(hier_idxs, Y, Y_hat, Y_train):
# We use the indexes obtained from the aggregation tags
# to compute scaled CRPS across the hierarchy levels
# to compute MS scaled Error across the hierarchy levels
msse_list = []
for idxs in hier_idxs:
y = Y[idxs, :]
y_hat = Y_hat[idxs, :]
y_train = Y_train[idxs, :]
crps = msse(y, y_hat, y_train)
msse_list.append(crps)
return msse_list
level_msse = msse(y, y_hat, y_train)
msse_list.append(level_msse)
return msse_list

@staticmethod
def _get_hierarchical_rel_mse(hier_idxs, Y, Y_hat, Y_train):
# We use the indexes obtained from the aggregation tags
# to compute relative MSE across the hierarchy levels
rel_mse_list = []
for idxs in hier_idxs:
y = Y[idxs, :]
y_hat = Y_hat[idxs, :]
y_train = Y_train[idxs, :]
level_rel_mse = rel_mse(y, y_hat, y_train)
rel_mse_list.append(level_rel_mse)
return rel_mse_list

@staticmethod
def _sort_hier_df(Y_df, S_df):
Expand Down Expand Up @@ -212,6 +225,8 @@ def run_baselines(dataset, intervals_method, verbose=False, seed=0):
with CodeTimer('Evaluate Base Forecasts ', verbose):
crps_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())),
'Level': ['Overall'] + list(tags.keys()),}
relmse_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())),
'Level': ['Overall'] + list(tags.keys()),}
msse_results = {'Dataset': [dataset] * len(['Overall'] + list(tags.keys())),
'Level': ['Overall'] + list(tags.keys()),}
Y_hat_quantiles = Y_hat_df.drop(columns=['ds', 'AutoARIMA'])
Expand All @@ -223,6 +238,11 @@ def run_baselines(dataset, intervals_method, verbose=False, seed=0):
Yq_hat=y_hat_quantiles_np,
q_to_pred=QUANTILES,
hier_idxs=data['hier_idxs'])
relmse_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_rel_mse(
Y=y_test,
Y_hat=y_hat_np,
Y_train=y_train,
hier_idxs=data['hier_idxs'])
msse_results['AutoARIMA'] = HierarchicalDataset._get_hierarchical_msse(
Y=y_test,
Y_hat=y_hat_np,
Expand Down Expand Up @@ -251,6 +271,28 @@ def run_baselines(dataset, intervals_method, verbose=False, seed=0):

crps_results = pd.DataFrame(crps_results)

with CodeTimer('Evaluate Models relMSE', verbose):
for model in hrec.level_names.keys():
relmse_results[model] = []
for level in relmse_results['Level']:
if level=='Overall':
row_idxs = np.arange(len(S_df))
else:
row_idxs = S_df.index.get_indexer(tags[level])
col_idx = model_columns.get_loc(model)
_y = y_test[row_idxs,:]
_y_train = y_train[row_idxs,:]
_y_hat_seeds = y_rec[:,row_idxs,:,:][:,:,:,col_idx]

level_model_relmse = []
for seed_idx in range(y_rec.shape[0]):
_y_hat = _y_hat_seeds[seed_idx,:,:]
level_model_relmse.append(rel_mse(y=_y, y_hat=_y_hat, y_train=_y_train))
level_model_relmse = f'{np.mean(level_model_relmse):.4f}'
relmse_results[model].append(level_model_relmse)

relmse_results = pd.DataFrame(relmse_results)

with CodeTimer('Evaluate Models MSSE ', verbose):
for model in hrec.level_names.keys():
msse_results[model] = []
Expand All @@ -273,7 +315,7 @@ def run_baselines(dataset, intervals_method, verbose=False, seed=0):

msse_results = pd.DataFrame(msse_results)

return crps_results, msse_results
return crps_results, relmse_results, msse_results

if __name__ == '__main__':

Expand Down Expand Up @@ -305,25 +347,35 @@ def run_baselines(dataset, intervals_method, verbose=False, seed=0):
# Run experiments
crps_results_list = []
msse_results_list = []
try:
crps_results, msse_results = run_baselines(dataset=dataset,
relmse_results_list = []

try: # Hacky protection for non strictly hierarchical datasets
crps_results, relmse_results, msse_results = run_baselines(dataset=dataset,
intervals_method=intervals_method, verbose=verbose)
crps_results_list.append(crps_results)
msse_results_list.append(msse_results)
relmse_results_list.append(relmse_results)
except Exception as e:
print('failed ', dataset)
print(str(e))
print('\n\n')

crps_results_df = pd.concat(crps_results_list)
msse_results_df = pd.concat(msse_results_list)
relmse_results_df = pd.concat(relmse_results_list)

crps_results_df.to_csv(f'./data/{intervals_method}_crps.csv', index=False)
msse_results_df.to_csv(f'./data/{intervals_method}_msse.csv', index=False)
relmse_results_df.to_csv(f'./data/{intervals_method}_relmse.csv', index=False)

print('='*(200+24))
print(f'{intervals_method} sCRPS:')
print(crps_results_df)

print('\n\n'+'='*(200+24))
print(f'{intervals_method} relMSE:')
print(relmse_results_df)

print('\n\n'+'='*(200+24))
print(f'{intervals_method} MSSE:')
print(msse_results_df)
2 changes: 1 addition & 1 deletion nbs/_quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ website:
collapse-below: lg
left:
- text: "Get Started"
href: examples/Getting_Started_short.ipynb
href: examples/TourismSmall.ipynb
- text: "NixtlaVerse"
menu:
- text: "MLForecast 🤖"
Expand Down
2 changes: 1 addition & 1 deletion nbs/examples/HierarchicalForecast-GluonTS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"\n",
"The notebook utilizes the GluonTS DeepAREstimator to create base forecasts for the TourismLarge Hierarchical Dataset. We make the base forecasts compatible with HierarchicalForecast's reconciliation functions via the `samples_to_quantiles_df` utility function that transforms GluonTS' output forecasts into a compatible data frame format. After that, we use HierarchicalForecast to reconcile the base predictions.\n",
"\n",
"**References**<br>\n",
"**References**<br> \n",
"- [David Salinas, Valentin Flunkert, Jan Gasthaus, Tim Januschowski (2020). \"DeepAR: Probabilistic forecasting with autoregressive recurrent networks\". International Journal of Forecasting.](https://www.sciencedirect.com/science/article/pii/S0169207019301888)<br>\n",
"- [Alexander Alexandrov et. al (2020). \"GluonTS: Probabilistic and Neural Time Series Modeling in Python\". Journal of Machine Learning Research.](https://www.jmlr.org/papers/v21/19-820.html)<br>\n",
"\n",
Expand Down
20 changes: 20 additions & 0 deletions nbs/examples/MLFrameworksExample.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1741,6 +1741,26 @@
"With minimal parsing, we can reconcile the raw output predictions with different HierarchicalForecast reconciliation methods."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
":::{.callout-warning collapse=\"false\"}\n",
"#### Reconciliation Methods Availability\n",
"\n",
"The following reconciliation methods require access to insample predictions:<br>\n",
"- `ERM(method='closed'), ERM(method='reg_bu')`<br>\n",
"- `TopDown(method='average_proportions'), TopDown(method='proportion_averages')`<br>\n",
"- `MiddleOut(top_down_method='average_proportions'), MiddleOut(top_down_method='proportion_averages')`<br>\n",
"- `MinTrace(method='wls_var'), MinTrace(method='mint_cov'), MinTrace(method='mint_shrink')`<br>\n",
"\n",
"You can obtain NeuralForecast's insample predictions via the [`NeuralForecast.predict_insample`](https://nixtla.github.io/neuralforecast/core.html#neuralforecast.predict_insample) method.\n",
"\n",
"We are working on making MLForecast's insample predictions available.\n",
":::"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
Binary file added nbs/favicon_png.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.