Skip to content

Commit d5e7aff

Browse files
AA for all ts built
1 parent eef9637 commit d5e7aff

File tree

5 files changed

+2830
-515
lines changed

5 files changed

+2830
-515
lines changed

DFA_forecast.ipynb

Lines changed: 2650 additions & 481 deletions
Large diffs are not rendered by default.

DFA_func.ipynb

Lines changed: 180 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 24,
66
"id": "ba5efd61",
77
"metadata": {},
88
"outputs": [],
@@ -14,10 +14,12 @@
1414
"from statsmodels.tsa.forecasting.theta import ThetaModel\n",
1515
"from scipy.optimize import fsolve\n",
1616
"from scipy.special import rel_entr, kl_div\n",
17+
"from sklearn.model_selection import ParameterGrid\n",
1718
"from tqdm.notebook import tqdm\n",
1819
"\n",
1920
"import matplotlib.pyplot as plt\n",
20-
"plt.style.use('fivethirtyeight')\n",
21+
"# plt.style.use('fivethirtyeight')\n",
22+
"# plt.style.use('seaborn')\n",
2123
"\n",
2224
"import warnings\n",
2325
"warnings.filterwarnings('ignore')"
@@ -30,7 +32,7 @@
3032
"metadata": {},
3133
"outputs": [],
3234
"source": [
33-
"def get_density_forecast(ts, horizon, base_alg, base_params={}, fit_params={},\n",
35+
"def get_density_forecast(ts, horizon, base_alg, base_params={}, fit_params={}, exog=None,\n",
3436
" bins='auto', omega=None, fittedvalues=False):\n",
3537
" \"\"\"\n",
3638
" Returns a list of density dictionaries {'bins': np.array, 'probs': np.array, 'dotted_forecast': float}.\n",
@@ -53,8 +55,11 @@
5355
" \n",
5456
" if omega is not None:\n",
5557
" bins = omega\n",
56-
" \n",
57-
" alg = base_alg(ts, **base_params).fit(**fit_params)\n",
58+
" \n",
59+
" if base_alg == ARIMA:\n",
60+
" alg = base_alg(ts, exog, **base_params).fit(**fit_params)\n",
61+
" else:\n",
62+
" alg = base_alg(ts, **base_params).fit(**fit_params)\n",
5863
" \n",
5964
" if fittedvalues:\n",
6065
" alg_preds = alg.predict(start=0, end=len(ts) + horizon - 1)\n",
@@ -286,7 +291,7 @@
286291
"outputs": [],
287292
"source": [
288293
"def aggregating_algorithm(ts, horizon, base_alg_list, bins=10, omega_mode=\"basic\",\n",
289-
" loss_function=brier_loss, m=2, p=2, weights=None, eta=1):\n",
294+
" loss_function=brier_loss, exog=None, m=2, p=2, weights=None, eta=1):\n",
290295
" \"\"\"\n",
291296
" Returns density dictionary {'bins': np.array, 'probs': np.array, 'dotted_forecast': float}.\n",
292297
" \n",
@@ -318,7 +323,7 @@
318323
" i = 0\n",
319324
" for base_alg, base_alg_params, fit_alg_params in base_alg_list:\n",
320325
" BA_preds[i] = get_density_forecast(ts, horizon, base_alg, base_params=base_alg_params,\n",
321-
" fit_params=fit_alg_params, omega=omega, fittedvalues=True)\n",
326+
" fit_params=fit_alg_params, omega=omega, exog=exog, fittedvalues=True)\n",
322327
" i += 1\n",
323328
" \n",
324329
" \n",
@@ -360,14 +365,14 @@
360365
"outputs": [],
361366
"source": [
362367
"def get_optim_m(ts, base_alg_list, omega_mode=\"basic\",\n",
363-
" bins=10, p=2):\n",
368+
" exog=None, bins=10, p=2):\n",
364369
" best_m = 0\n",
365370
" best_loss = 1000\n",
366371
" AA_losses = {}\n",
367372
" # (np.linspace(0.1, 10, 100),\n",
368373
" for m in tqdm(np.linspace(0.5, 10, 20), leave=False):\n",
369374
" AA_preds = aggregating_algorithm(ts, 0, base_alg_list,\n",
370-
" bins=bins, omega_mode=omega_mode, m=m, p=p)\n",
375+
" bins=bins, omega_mode=omega_mode, exog=exog, m=m, p=p)\n",
371376
" AA_losses[m] = []\n",
372377
" for i in range(len(ts.values)):\n",
373378
" AA_losses[m].append((brier_loss(ts.values[i], AA_preds[i])))\n",
@@ -378,27 +383,105 @@
378383
" best_loss = loss\n",
379384
" \n",
380385
" n_bins = AA_preds[0]['probs'].size\n",
381-
" return best_m, best_loss, n_bins"
386+
" return best_m, best_loss, n_bins, AA_losses"
382387
]
383388
},
384389
{
385390
"cell_type": "code",
386391
"execution_count": 14,
392+
"id": "76091910",
393+
"metadata": {},
394+
"outputs": [],
395+
"source": [
396+
"def plot_optim_m(all_losses_dict, best_m, ax=plt):\n",
397+
" all_losses = [(key, np.mean(values)) for key, values in all_losses_dict.items()]\n",
398+
" m_list, loss_list = zip(*all_losses)\n",
399+
"\n",
400+
" ax.plot(m_list, loss_list)\n",
401+
" \n",
402+
" x, y = best_m, np.mean(all_losses_dict[best_m])\n",
403+
" ax.scatter(x, y, s=70, c=\"orange\", zorder=3)\n",
404+
" ax.text(x - 0.4, y + 0.02, '({}, {})'.format(x, round(y, 2)));"
405+
]
406+
},
407+
{
408+
"cell_type": "code",
409+
"execution_count": 77,
410+
"id": "4fd425d8",
411+
"metadata": {},
412+
"outputs": [],
413+
"source": [
414+
"def get_opt_arima(endog, exog, grid_params):\n",
415+
" opt_model = {'opt_params': {'order': (0, 0, 0), 'seasonal_order': (0, 0, 0, 0)},\n",
416+
" 'opt_bic': 10**6, 'opt_model': None}\n",
417+
" \n",
418+
" # 'opt_params': {'p': 0, 'd':0, 'q': 0, 'P': 0, 'D':0, 'Q': 0, 's': 0}\n",
419+
"\n",
420+
" grid = ParameterGrid(grid_params)\n",
421+
" for params in tqdm(grid, leave=False):\n",
422+
" try:\n",
423+
" arima = ARIMA(endog,\n",
424+
" exog,\n",
425+
" order=(params['p'], params['d'], params['q']),\n",
426+
" seasonal_order=(params['P'], params['D'], params['Q'], params['s'])).fit()\n",
427+
" except:\n",
428+
" print(\"LU decomposition error\")\n",
429+
" continue\n",
430+
"\n",
431+
" if arima.bic < opt_model['opt_bic']:\n",
432+
" opt_model['opt_params']['order'] = (params['p'], params['d'], params['q'])\n",
433+
" opt_model['opt_params']['seasonal_order'] = (params['P'], params['D'], params['Q'], params['s'])\n",
434+
" opt_model['opt_bic'] = arima.bic\n",
435+
" opt_model['opt_model'] = arima\n",
436+
" \n",
437+
" return opt_model"
438+
]
439+
},
440+
{
441+
"cell_type": "code",
442+
"execution_count": null,
443+
"id": "b9d06247",
444+
"metadata": {},
445+
"outputs": [],
446+
"source": [
447+
"def get_auto_base_alg_list(endog, exog, grid_params):\n",
448+
" auto_base_alg_list = []\n",
449+
" \n",
450+
" opt_arima = get_opt_arima(endog, exog, grid_params)\n",
451+
" auto_base_alg_list.append((ARIMA, opt_arima['opt_params'], {}))\n",
452+
" \n",
453+
" SES = ExponentialSmoothing(endog).fit(optimized=True)\n",
454+
" auto_base_alg_list.append((ExponentialSmoothing, {},\n",
455+
" {'smoothing_level': SES.params['smoothing_level']}))\n",
456+
" \n",
457+
" ES_add7 = ExponentialSmoothing(endog, seasonal=\"add\",\n",
458+
" seasonal_periods=7).fit(optimized=True)\n",
459+
" auto_base_alg_list.append((ExponentialSmoothing, {\"seasonal\": \"add\", \"seasonal_periods\": 7},\n",
460+
" {'smoothing_level': ES_add7.params['smoothing_level'],\n",
461+
" 'smoothing_seasonal': ES_add7.params['smoothing_seasonal']}))\n",
462+
" \n",
463+
" return auto_base_alg_list"
464+
]
465+
},
466+
{
467+
"cell_type": "code",
468+
"execution_count": 136,
387469
"id": "813a429f",
388470
"metadata": {},
389471
"outputs": [],
390472
"source": [
391-
"def plot_losses(ts, horizon, base_alg_list, omega_mode, best_m, ax, bins=10, title='', legend=[]):\n",
473+
"def plot_losses(ts, horizon, base_alg_list, omega_mode, best_m, ax, bins=10, exog=None,\n",
474+
" title='', legend=[], number_to_plot=None):\n",
392475
" omega = get_omega(ts, mode=omega_mode, bins=bins)\n",
393476
" \n",
394477
" pred_dict = {}\n",
395478
" i = 1\n",
396479
" for base_alg, base_params, fit_params in base_alg_list:\n",
397480
" pred_dict[f\"alg{i}\"] = get_density_forecast(ts, horizon, base_alg, base_params, fit_params,\n",
398-
" bins=bins, omega=omega, fittedvalues=True)\n",
481+
" bins=bins, omega=omega, exog=exog, fittedvalues=True)\n",
399482
" i += 1\n",
400483
" \n",
401-
" pred_dict[\"AA\"] = aggregating_algorithm(ts, horizon, base_alg_list,\n",
484+
" pred_dict[\"AA\"] = aggregating_algorithm(ts, horizon, base_alg_list, exog=exog,\n",
402485
" bins=bins, omega_mode=omega_mode, m=best_m)\n",
403486
"\n",
404487
" losses_dict = {key : [] for key in pred_dict.keys()}\n",
@@ -418,59 +501,104 @@
418501
" print(\"Theoretical bound met\")\n",
419502
" else:\n",
420503
" print(f\"Theoretical bound not met: {tb_errors / ts.values.size}% violations\")\n",
504+
" \n",
505+
" losses_dict_sorted = {k: v for k, v in sorted(losses_dict.items(), key=lambda item: item[1].mean())}\n",
421506
" \n",
422-
" for alg in losses_dict.keys():\n",
423-
" ax.plot(ts.index, losses_dict[alg])\n",
507+
" if not number_to_plot:\n",
508+
" number_to_plot = len(base_alg_list)\n",
509+
" \n",
510+
" real_legend = []\n",
511+
" i = 0\n",
512+
" for alg in losses_dict_sorted.keys():\n",
513+
" if i < number_to_plot:\n",
514+
" if alg == \"AA\":\n",
515+
" continue\n",
516+
" ax.plot(ts.index, losses_dict_sorted[alg])\n",
517+
" i += 1\n",
518+
" real_legend.append(legend[int(alg[-1]) - 1])\n",
519+
" print(f\"{legend[int(alg[-1]) - 1]} mean loss: {losses_dict_sorted[alg].mean()}\")\n",
520+
" else:\n",
521+
" break\n",
522+
" \n",
523+
" ax.plot(ts.index, losses_dict_sorted['AA'])\n",
524+
" print(f\"AA mean loss: {losses_dict_sorted['AA'].mean()}\")\n",
525+
" real_legend.append('AA')\n",
424526
" ax.plot(ts.index, theoretical_bound)\n",
527+
" real_legend.append('TB')\n",
425528
" \n",
426529
" ax.set_title(title)\n",
427-
" ax.legend(legend, loc='lower right');"
530+
" ax.legend(real_legend, loc='upper right');\n",
531+
" return losses_dict_sorted"
428532
]
429533
},
430534
{
431535
"cell_type": "code",
432-
"execution_count": 25,
536+
"execution_count": 170,
433537
"id": "c8c3f323",
434538
"metadata": {},
435539
"outputs": [],
436540
"source": [
437-
"def get_all_predictions(ts_dict, horizon, base_alg_list, omega_mode, best_m, bins=10):\n",
541+
"def get_all_predictions(train_dict, test_dict, horizon, base_alg_list, grid_params,\n",
542+
" omega_mode=\"basic\", best_m=\"optim\", is_exog=False, bins=10):\n",
438543
" pred_dict = {}\n",
439544
" losses_dict = {}\n",
440545
" \n",
441-
" for ts_name, ts in tqdm(ts_dict.items()):\n",
442-
" omega = get_omega(ts, mode=omega_mode, bins=bins)\n",
443-
"\n",
546+
" for ts_name in tqdm(train_dict.keys()):\n",
547+
" if is_exog:\n",
548+
" ts_train = train_dict[ts_name][\"ts\"]\n",
549+
" ts_test = test_dict[ts_name][\"ts\"]\n",
550+
" exog_train = (pd.DataFrame(data=[train_dict[ts_name][\"actual_price\"],\n",
551+
" train_dict[ts_name][\"promo\"]])\n",
552+
" .transpose().astype({\"Actual_Price\": float, \"Promo\": int}))\n",
553+
" exog_test = (pd.DataFrame(data=[test_dict[ts_name][\"actual_price\"],\n",
554+
" test_dict[ts_name][\"promo\"]])\n",
555+
" .transpose().astype({\"Actual_Price\": float, \"Promo\": int}))\n",
556+
" else:\n",
557+
" ts_train = train_dict[ts_name]\n",
558+
" ts_test = tesr_dict[ts_name]\n",
559+
" exog_train = None\n",
560+
" exog_test = None\n",
561+
" \n",
562+
" auto_base_alg_list = get_auto_base_alg_list(ts_train, exog_train, grid_params)\n",
563+
" all_base_alg_list = base_alg_list + auto_base_alg_list\n",
564+
" \n",
565+
" if best_m == \"optim\":\n",
566+
" best_m, _, _, _ = get_optim_m(ts_train, all_base_alg_list, exog=exog_train,\n",
567+
" bins=bins, omega_mode=omega_mode)\n",
568+
" \n",
569+
" omega = get_omega(ts_test, mode=omega_mode, bins=bins)\n",
570+
" \n",
444571
" pred_dict[ts_name] = {}\n",
445572
" i = 1\n",
446-
" for base_alg, base_params, fit_params in base_alg_list:\n",
447-
" pred_dict[ts_name][f\"alg{i}\"] = get_density_forecast(ts, horizon, base_alg, base_params, fit_params,\n",
448-
" bins=bins, omega=omega, fittedvalues=True)\n",
573+
" for base_alg, base_params, fit_params in all_base_alg_list:\n",
574+
" pred_dict[ts_name][f\"alg{i}\"] = get_density_forecast(ts_test, horizon, base_alg, base_params, fit_params,\n",
575+
" exog=exog_test, bins=bins,\n",
576+
" omega=omega, fittedvalues=True)\n",
449577
" i += 1\n",
450578
"\n",
451-
" pred_dict[ts_name][\"AA\"] = aggregating_algorithm(ts, horizon, base_alg_list,\n",
579+
" pred_dict[ts_name][\"AA\"] = aggregating_algorithm(ts_test, horizon, all_base_alg_list, exog=exog_test,\n",
452580
" bins=bins, omega_mode=omega_mode, m=best_m)\n",
453581
"\n",
454582
" losses_dict[ts_name] = {key : [] for key in pred_dict[ts_name].keys()}\n",
455583
"\n",
456-
" for i in range(len(ts.values)):\n",
584+
" for i in range(len(ts_test.values)):\n",
457585
" for alg in losses_dict[ts_name].keys():\n",
458-
" losses_dict[ts_name][alg].append(brier_loss(ts.values[i], pred_dict[ts_name][alg][i]))\n",
586+
" losses_dict[ts_name][alg].append(brier_loss(ts_test.values[i], pred_dict[ts_name][alg][i]))\n",
459587
"\n",
460588
" for alg in losses_dict[ts_name].keys():\n",
461-
" losses_dict[ts_name][alg] = np.cumsum(losses_dict[ts_name][alg]) / list(range(1, len(ts.values) + 1))\n",
589+
" losses_dict[ts_name][alg] = np.cumsum(losses_dict[ts_name][alg]) / list(range(1, len(ts_test.values) + 1))\n",
462590
" \n",
463591
" return pred_dict, losses_dict"
464592
]
465593
},
466594
{
467595
"cell_type": "code",
468-
"execution_count": 96,
596+
"execution_count": 102,
469597
"id": "27f60e77",
470598
"metadata": {},
471599
"outputs": [],
472600
"source": [
473-
"def plot_total_losses(losses_dict, ax, title, legend):\n",
601+
"def plot_total_losses(losses_dict, ax, title, legend, number_to_plot=None):\n",
474602
" total_losses = {alg: np.zeros_like(losses) for alg, losses in losses_dict[next(iter(losses_dict))].items()}\n",
475603
" \n",
476604
" for ts_losses in losses_dict.values():\n",
@@ -480,12 +608,30 @@
480608
" for alg, losses in total_losses.items():\n",
481609
" total_losses[alg] = losses / len(losses_dict)\n",
482610
" \n",
483-
" for alg, losses in total_losses.items():\n",
484-
" ax.plot(losses)\n",
485-
" print(f\"{alg}: {np.mean(losses)}\")\n",
611+
" total_losses_sorted = {k: v for k, v in sorted(total_losses.items(), key=lambda item: item[1].mean())}\n",
612+
" \n",
613+
" if not number_to_plot:\n",
614+
" number_to_plot = len(legend) - 2\n",
615+
" \n",
616+
" real_legend = []\n",
617+
" i = 0\n",
618+
" for alg in total_losses_sorted.keys():\n",
619+
" if i < number_to_plot:\n",
620+
" if alg == \"AA\":\n",
621+
" continue\n",
622+
" ax.plot(total_losses_sorted[alg])\n",
623+
" i += 1\n",
624+
" real_legend.append(legend[int(alg[-1]) - 1])\n",
625+
" print(f\"{legend[int(alg[-1]) - 1]} mean loss: {total_losses_sorted[alg].mean()}\")\n",
626+
" else:\n",
627+
" break\n",
628+
" \n",
629+
" ax.plot(total_losses_sorted['AA'])\n",
630+
" print(f\"AA mean loss: {total_losses_sorted['AA'].mean()}\")\n",
631+
" real_legend.append('AA')\n",
486632
" \n",
487633
" ax.set_title(title)\n",
488-
" ax.legend(legend, loc='lower right');"
634+
" ax.legend(real_legend, loc='lower right');"
489635
]
490636
}
491637
],

data/losses_dict.pickle

1.47 MB
Binary file not shown.

data/pred_dict.pickle

24.4 MB
Binary file not shown.
File renamed without changes.

0 commit comments

Comments
 (0)