Skip to content

Commit 5e2dd75

Browse files
committed
Update plots and tutorial
1 parent e270e1a commit 5e2dd75

File tree

3 files changed

+175
-116
lines changed

3 files changed

+175
-116
lines changed

climada/util/calibrate/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from .bayesian_optimizer import (
2323
BayesianOptimizer,
2424
BayesianOptimizerController,
25+
BayesianOptimizerOutput,
2526
BayesianOptimizerOutputEvaluator,
27+
select_best
2628
)
2729
from .scipy_optimizer import ScipyMinimizeOptimizer

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 117 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@
2828

2929
import pandas as pd
3030
import numpy as np
31+
import matplotlib as mpl
3132
import matplotlib.pyplot as plt
3233
import matplotlib.axes as maxes
34+
import matplotlib.patches as mpatches
35+
import matplotlib.ticker as mticker
3336
from bayes_opt import BayesianOptimization, Events, UtilityFunction, ScreenLogger
3437
from bayes_opt.target_space import TargetSpace
3538

@@ -57,8 +60,46 @@ def allowed(self, values):
5760
return self.results
5861

5962

60-
# TODO: Add read/write method
61-
# TODO: Export this class
63+
def select_best(
64+
p_space_df: pd.DataFrame,
65+
cost_limit: float,
66+
absolute: bool = True,
67+
cost_col=("Calibration", "Cost Function"),
68+
) -> pd.DataFrame:
69+
"""Select the best parameter space samples defined by a cost function limit
70+
71+
The limit is a factor of the minimum value relative to itself (``absolute=True``) or
72+
to the range of cost function values (``absolute=False``). A ``cost_limit`` of 0.1
73+
will select all rows where the cost function is within
74+
75+
- 110% of the minimum value if ``absolute=True``.
76+
- 10% of the range between minimum and maximum cost function value if
77+
``absolute=False``.
78+
79+
Parameters
80+
----------
81+
p_space_df : pd.DataFrame
82+
The parameter space to select from.
83+
cost_limit : float
84+
The limit factor used for selection.
85+
absolute : bool, optional
86+
Whether the limit factor is applied to the minimum value (``True``) or the range
87+
of values (``False``). Defaults to ``True``.
88+
cost_col : Column specifier, optional
89+
The column indicating cost function values. Defaults to
90+
``("Calibration", "Cost Function")``.
91+
92+
Returns
93+
-------
94+
pd.DataFrame
95+
A subselection of the input data frame.
96+
"""
97+
min_val = p_space_df[cost_col].min()
98+
cost_range = min_val if absolute else p_space_df[cost_col].max() - min_val
99+
max_val = min_val + cost_range * cost_limit
100+
return p_space_df.loc[p_space_df[cost_col] <= max_val]
101+
102+
62103
@dataclass
63104
class BayesianOptimizerOutput(Output):
64105
"""Output of a calibration with :py:class:`BayesianOptimizer`
@@ -374,7 +415,10 @@ def _previous_max(self):
374415
return self._improvements[-1].target
375416

376417
def optimizer_params(self) -> dict[str, Union[int, float, str, UtilityFunction]]:
377-
"""Return parameters for the optimizer"""
418+
"""Return parameters for the optimizer
419+
420+
In the current implementation, these do not change.
421+
"""
378422
return {
379423
"init_points": self.init_points,
380424
"n_iter": self.n_iter,
@@ -385,7 +429,7 @@ def optimizer_params(self) -> dict[str, Union[int, float, str, UtilityFunction]]
385429
),
386430
}
387431

388-
def _is_random_step(self):
432+
def _is_random_step(self) -> bool:
389433
"""Return true if we sample randomly instead of Bayesian"""
390434
return (self._last_it_end + self.steps) < self.init_points
391435

@@ -491,7 +535,7 @@ def update(self, event: str, instance: BayesianOptimization):
491535
self._last_it_end = self.steps
492536

493537
def improvements(self) -> pd.DataFrame:
494-
"""Return improvements as nice data
538+
"""Return improvements as nicely formatted data
495539
496540
Returns
497541
-------
@@ -662,58 +706,60 @@ def __post_init__(self):
662706

663707
def plot_impf_variability(
664708
self,
665-
cost_func_diff: float = 0.1,
666709
p_space_df: Optional[pd.DataFrame] = None,
667710
plot_haz: bool = True,
711+
plot_opt_kws: Optional[dict] = None,
668712
plot_impf_kws: Optional[dict] = None,
669713
plot_hist_kws: Optional[dict] = None,
714+
plot_axv_kws: Optional[dict] = None,
670715
):
671716
"""Plot impact function variability with parameter combinations of
672717
almost equal cost function values
673718
674719
Args:
675-
cost_func_diff (float, optional): Max deviation from optimal cost
676-
function value (as fraction). Defaults to 0.1 (i.e. 10%).
677-
p_space_df (pd.DataFrame, optional): parameter space. Defaults to None.
720+
p_space_df (pd.DataFrame, optional): Parameter space to plot functions from.
721+
If ``None``, this uses the space returned by
722+
:py:meth:`~BayesianOptimizerOutput.p_space_to_dataframe`. Use
723+
:py:func:`select_best` for a convenient subselection of parameters close
724+
to the optimum.
678725
plot_haz (bool, optional): Whether or not to plot hazard intensity
679726
distibution. Defaults to False.
680-
plot_impf_kws (dict, optional): Keyword arguments for impact
727+
plot_opt_kws (dict, optional): Keyword arguments for optimal impact
681728
function plot. Defaults to None.
729+
plot_impf_kws (dict, optional): Keyword arguments for all impact
730+
function plots. Defaults to None.
682731
plot_hist_kws (dict, optional): Keyword arguments for hazard
683-
intensity distribution plot. Defaults to None.
732+
intensity histogram plot. Defaults to None.
733+
plot_axv_kws (dict, optional): Keyword arguments for hazard intensity range
734+
plot (axvspan).
684735
"""
685736

686737
# Initialize plot keyword arguments
738+
if plot_opt_kws is None:
739+
plot_opt_kws = {}
687740
if plot_impf_kws is None:
688741
plot_impf_kws = {}
689742
if plot_hist_kws is None:
690743
plot_hist_kws = {}
744+
if plot_axv_kws is None:
745+
plot_axv_kws = {}
691746

692747
# Retrieve hazard type and parameter space
693748
haz_type = self.input.hazard.haz_type
694749
if p_space_df is None:
695750
p_space_df = self.output.p_space_to_dataframe()
696751

697-
# Retrieve parameters of impact functions with cost function values
698-
# within 'cost_func_diff' % of the best estimate
699-
params_within_range = p_space_df["Parameters"]
700-
plot_space_label = "Parameter space"
701-
if cost_func_diff is not None:
702-
max_cost_func_val = p_space_df["Calibration", "Cost Function"].min() * (
703-
1 + cost_func_diff
704-
)
705-
params_within_range = params_within_range.loc[
706-
p_space_df["Calibration", "Cost Function"] <= max_cost_func_val
707-
]
708-
plot_space_label = (
709-
f"within {int(cost_func_diff*100)} percent " f"of best fit"
710-
)
711-
712752
# Set plot defaults
713-
color = plot_impf_kws.pop("color", "tab:blue")
714-
lw = plot_impf_kws.pop("lw", 2)
715-
zorder = plot_impf_kws.pop("zorder", 3)
716-
label = plot_impf_kws.pop("label", "best fit")
753+
colors = mpl.colormaps["tab20"].colors
754+
lw = plot_opt_kws.pop("lw", 2)
755+
label_opt = plot_opt_kws.pop("label", "Optimal Function")
756+
color_opt = plot_opt_kws.pop("color", colors[0])
757+
zorder_opt = plot_opt_kws.pop("zorder", 4)
758+
759+
label_impf = plot_impf_kws.pop("label", "All Functions")
760+
color_impf = plot_impf_kws.pop("color", colors[1])
761+
alpha_impf = plot_impf_kws.pop("alpha", 0.5)
762+
zorder_impf = plot_impf_kws.pop("zorder", 3)
717763

718764
# get number of impact functions and create a plot for each
719765
n_impf = len(self.impf_set.get_func(haz_type=haz_type))
@@ -727,63 +773,76 @@ def plot_impf_variability(
727773
ax.plot(
728774
best_impf.intensity,
729775
best_impf.mdd * best_impf.paa * 100,
730-
color=color,
776+
color=color_opt,
731777
lw=lw,
732-
zorder=zorder,
733-
label=label,
734-
**plot_impf_kws,
778+
zorder=zorder_opt,
779+
label=label_opt,
780+
**plot_opt_kws,
735781
)
736782

737783
# Plot all impact functions within 'cost_func_diff' % of best estimate
738-
for row in range(params_within_range.shape[0]):
739-
label_temp = plot_space_label if row == 0 else None
784+
for idx, (_, row) in enumerate(p_space_df.iterrows()):
785+
label_temp = label_impf if idx == 0 else None
740786

741-
sel_params = params_within_range.iloc[row, :].to_dict()
742-
temp_impf_set = self.input.impact_func_creator(**sel_params)
787+
temp_impf_set = self.input.impact_func_creator(**row["Parameters"])
743788
temp_impf = temp_impf_set.get_func(haz_type=haz_type)[impf_idx]
744789

745790
ax.plot(
746791
temp_impf.intensity,
747792
temp_impf.mdd * temp_impf.paa * 100,
748-
color="grey",
749-
alpha=0.4,
793+
color=color_impf,
794+
alpha=alpha_impf,
795+
zorder=zorder_impf,
750796
label=label_temp,
751797
)
752798

799+
handles, _ = ax.get_legend_handles_labels()
800+
ax.set(
801+
xlabel=f"Intensity ({self.input.hazard.units})",
802+
ylabel="Mean Damage Ratio (MDR)",
803+
xlim=(min(best_impf.intensity), max(best_impf.intensity)),
804+
)
805+
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=100))
806+
753807
# Plot hazard intensity value distributions
754808
if plot_haz:
755809
haz_vals = self.input.hazard.intensity[
756810
:, self.input.exposure.gdf[f"centr_{haz_type}"]
757-
]
811+
].data
758812

759813
# Plot defaults
760-
color_hist = plot_hist_kws.pop("color", "tab:orange")
761-
alpha_hist = plot_hist_kws.pop("alpha", 0.3)
762814
bins = plot_hist_kws.pop("bins", 40)
763-
label = plot_hist_kws.pop("label", "Hazard intensity\noccurence")
815+
label_hist = plot_hist_kws.pop("label", "Hazard Intensity")
816+
color_hist = plot_hist_kws.pop("color", colors[2])
817+
color_axv = plot_axv_kws.pop("color", colors[3])
818+
alpha_axv = plot_axv_kws.pop("alpha", 0.5)
764819

765820
# Histogram plot
766821
ax2 = ax.twinx()
822+
ax.set_facecolor("none")
823+
ax.set_zorder(2)
824+
ax2.set_zorder(1)
825+
ax2.axvspan(
826+
haz_vals.min(), haz_vals.max(), color=color_axv, alpha=alpha_axv
827+
)
767828
ax2.hist(
768-
haz_vals.data,
829+
haz_vals,
769830
bins=bins,
770831
color=color_hist,
771-
alpha=alpha_hist,
772-
label=label,
832+
label=label_hist,
773833
**plot_hist_kws,
774834
)
775-
ax2.set(ylabel="Hazard intensity occurence (#Exposure points)")
776-
ax.axvline(
777-
x=haz_vals.max(), label="Maximum hazard value", color="tab:orange"
778-
)
779-
ax2.legend(loc="lower right")
835+
ax2.set_ylabel("Exposure Points", color=color_hist)
780836

781-
ax.set(
782-
xlabel=f"Intensity ({self.input.hazard.units})",
783-
ylabel="Mean Damage Ratio (MDR) in %",
784-
xlim=(min(best_impf.intensity), max(best_impf.intensity)),
785-
)
786-
ax.legend()
837+
handles = handles + [
838+
mpatches.Patch(color=color_hist, label=label_hist),
839+
mpatches.Patch(color=color_axv, label=f"{label_hist} Range"),
840+
]
841+
ax.yaxis.label.set_color(color_opt)
842+
ax.tick_params(axis="y", colors=color_opt)
843+
ax2.tick_params(axis="y", colors=color_hist)
844+
845+
ax.legend(handles=handles)
787846
axes.append(ax)
788847

789848
if n_impf > 1:

0 commit comments

Comments
 (0)