Skip to content

Commit 74106b9

Browse files
authored
New plotting routines (#190)
* Added new functions that compute and plot 95% and 97.5% confidence ranges, as well as p values for CSEP consistency tests.
1 parent d13f74f commit 74106b9

File tree

1 file changed

+246
-0
lines changed

1 file changed

+246
-0
lines changed

csep/utils/plots.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,3 +1883,249 @@ def add_labels_for_publication(figure, style='bssa', labelsize=16):
18831883
ax.annotate(f'({annot})', (0.025, 1.025), xycoords='axes fraction', fontsize=labelsize)
18841884

18851885
return
1886+
1887+
def plot_consistency_test(eval_results, normalize=False, one_sided_lower=True, plot_args=None, variance=None):
1888+
""" Plots results from CSEP1 tests following the CSEP1 convention.
1889+
1890+
Note: All of the evaluations should be from the same type of evaluation, otherwise the results will not be
1891+
comparable on the same figure.
1892+
1893+
Args:
1894+
results (list): Contains the tests results :class:`csep.core.evaluations.EvaluationResult` (see note above)
1895+
normalize (bool): select this if the forecast likelihood should be normalized by the observed likelihood. useful
1896+
for plotting simulation based simulation tests.
1897+
one_sided_lower (bool): select this if the plot should be for a one sided test
1898+
plot_args(dict): optional argument containing a dictionary of plotting arguments, with keys as strings and items as described below
1899+
1900+
Optional plotting arguments:
1901+
* figsize: (:class:`list`/:class:`tuple`) - default: [6.4, 4.8]
1902+
* title: (:class:`str`) - default: name of the first evaluation result type
1903+
* title_fontsize: (:class:`float`) Fontsize of the plot title - default: 10
1904+
* xlabel: (:class:`str`) - default: 'X'
1905+
* xlabel_fontsize: (:class:`float`) - default: 10
1906+
* xticks_fontsize: (:class:`float`) - default: 10
1907+
* ylabel_fontsize: (:class:`float`) - default: 10
1908+
* color: (:class:`float`/:class:`None`) If None, sets it to red/green according to :func:`_get_marker_style` - default: 'black'
1909+
* linewidth: (:class:`float`) - default: 1.5
1910+
* capsize: (:class:`float`) - default: 4
1911+
* hbars: (:class:`bool`) Flag to draw horizontal bars for each model - default: True
1912+
* tight_layout: (:class:`bool`) Set matplotlib.figure.tight_layout to remove excess blank space in the plot - default: True
1913+
1914+
Returns:
1915+
ax (:class:`matplotlib.pyplot.axes` object)
1916+
"""
1917+
1918+
1919+
try:
1920+
results = list(eval_results)
1921+
except TypeError:
1922+
results = [eval_results]
1923+
results.reverse()
1924+
# Parse plot arguments. More can be added here
1925+
if plot_args is None:
1926+
plot_args = {}
1927+
figsize= plot_args.get('figsize', (7,8))
1928+
xlabel = plot_args.get('xlabel', 'X')
1929+
xlabel_fontsize = plot_args.get('xlabel_fontsize', None)
1930+
xticks_fontsize = plot_args.get('xticks_fontsize', None)
1931+
ylabel_fontsize = plot_args.get('ylabel_fontsize', None)
1932+
color = plot_args.get('color', 'black')
1933+
linewidth = plot_args.get('linewidth', None)
1934+
capsize = plot_args.get('capsize', 4)
1935+
hbars = plot_args.get('hbars', True)
1936+
tight_layout = plot_args.get('tight_layout', True)
1937+
percentile = plot_args.get('percentile', 95)
1938+
1939+
fig, ax = plt.subplots(figsize=figsize)
1940+
xlims = []
1941+
1942+
for index, res in enumerate(results):
1943+
# handle analytical distributions first, they are all in the form ['name', parameters].
1944+
if res.test_distribution[0] == 'poisson':
1945+
plow = scipy.stats.poisson.ppf((1 - percentile/100.)/2., res.test_distribution[1])
1946+
phigh = scipy.stats.poisson.ppf(1 - (1 - percentile/100.)/2., res.test_distribution[1])
1947+
observed_statistic = res.observed_statistic
1948+
1949+
elif res.test_distribution[0] == 'negative_binomial':
1950+
var = variance
1951+
observed_statistic = res.observed_statistic
1952+
mean = res.test_distribution[1]
1953+
upsilon = 1.0 - ((var - mean) / var)
1954+
tau = (mean**2 /(var - mean))
1955+
phigh = nbinom.ppf((1 - percentile/100.)/2., tau, upsilon)
1956+
plow = nbinom.ppf(1 - (1 - percentile/100.)/2., tau, upsilon)
1957+
1958+
# empirical distributions
1959+
else:
1960+
if normalize:
1961+
test_distribution = numpy.array(res.test_distribution) - res.observed_statistic
1962+
observed_statistic = 0
1963+
else:
1964+
test_distribution = numpy.array(res.test_distribution)
1965+
observed_statistic = res.observed_statistic
1966+
# compute distribution depending on type of test
1967+
if one_sided_lower:
1968+
plow = numpy.percentile(test_distribution, 5)
1969+
phigh = numpy.percentile(test_distribution, 100)
1970+
else:
1971+
plow = numpy.percentile(test_distribution, 2.5)
1972+
phigh = numpy.percentile(test_distribution, 97.5)
1973+
1974+
if not numpy.isinf(observed_statistic): # Check if test result does not diverges
1975+
low = observed_statistic - plow
1976+
high = phigh - observed_statistic
1977+
ax.errorbar(observed_statistic, index, xerr=numpy.array([[low, high]]).T,
1978+
fmt=_get_marker_style(observed_statistic, (plow, phigh), one_sided_lower),
1979+
capsize=4, linewidth=linewidth, ecolor=color, markersize = 10, zorder=1)
1980+
# determine the limits to use
1981+
xlims.append((plow, phigh, observed_statistic))
1982+
# we want to only extent the distribution where it falls outside of it in the acceptable tail
1983+
if one_sided_lower:
1984+
if observed_statistic >= plow and phigh < observed_statistic:
1985+
# draw dashed line to infinity
1986+
xt = numpy.linspace(phigh, 99999, 100)
1987+
yt = numpy.ones(100) * index
1988+
ax.plot(xt, yt, linestyle='--', linewidth=linewidth, color=color)
1989+
1990+
else:
1991+
print('Observed statistic diverges for forecast %s, index %i.'
1992+
' Check for zero-valued bins within the forecast'% (res.sim_name, index))
1993+
ax.barh(index, 99999, left=-10000, height=1, color=['red'], alpha=0.5)
1994+
1995+
1996+
try:
1997+
ax.set_xlim(*_get_axis_limits(xlims))
1998+
except ValueError:
1999+
raise ValueError('All EvaluationResults have infinite observed_statistics')
2000+
ax.set_yticks(numpy.arange(len(results)))
2001+
ax.set_yticklabels([res.sim_name for res in results], fontsize=14)
2002+
ax.set_ylim([-0.5, len(results)-0.5])
2003+
if hbars:
2004+
yTickPos = ax.get_yticks()
2005+
if len(yTickPos) >= 2:
2006+
ax.barh(yTickPos, numpy.array([99999] * len(yTickPos)), left=-10000,
2007+
height=(yTickPos[1] - yTickPos[0]), color=['w', 'gray'], alpha=0.2, zorder=0)
2008+
ax.set_xlabel(xlabel, fontsize=14)
2009+
ax.tick_params(axis='x', labelsize=13)
2010+
if tight_layout:
2011+
ax.figure.tight_layout()
2012+
fig.tight_layout()
2013+
return ax
2014+
2015+
2016+
def plot_pvalues_and_intervals(test_results, var=None):
2017+
2018+
variance= var
2019+
percentile = 97.5
2020+
p_values = []
2021+
2022+
if test_results[0].name == 'NBD N-Test' or test_results[0].name == 'Poisson N-Test':
2023+
legend_elements = [Line2D([0], [0], marker='o', color='red', lw=0, label=r'p < 10e-5', markersize=10, markeredgecolor='k'),
2024+
Line2D([0], [0], marker='o', color='#FF7F50', lw=0, label=r'10e-5 $\leq$ p < 10e-4', markersize=10, markeredgecolor='k'),
2025+
Line2D([0], [0], marker='o', color='gold', lw=0, label=r'10e-4 $\leq$ p < 10e-3', markersize=10, markeredgecolor='k'),
2026+
Line2D([0], [0], marker='o', color='white', lw=0, label=r'10e-3 $\leq$ p < 0.0125', markersize=10, markeredgecolor='k'),
2027+
Line2D([0], [0], marker='o', color='skyblue', lw=0, label=r'0.0125 $\leq$ p < 0.025', markersize=10, markeredgecolor='k'),
2028+
Line2D([0], [0], marker='o', color='blue', lw=0, label=r'p $\geq$ 0.025', markersize=10, markeredgecolor='k')]
2029+
2030+
ax.legend(handles=legend_elements, loc=4, fontsize=13, edgecolor='k')
2031+
2032+
if test_results[0].name == 'NBD N-Test':
2033+
2034+
for i in range(len(test_results)):
2035+
mean = test_results[i].test_distribution[1]
2036+
upsilon = 1.0 - ((variance - mean) / variance)
2037+
tau = (mean**2 /(variance - mean))
2038+
phigh97 = nbinom.ppf((1 - percentile/100.)/2., tau, upsilon)
2039+
plow97= nbinom.ppf(1 - (1 - percentile/100.)/2., tau, upsilon)
2040+
low97 = test_results[i].observed_statistic - plow97
2041+
high97 = phigh97 - test_results[i].observed_statistic
2042+
ax.errorbar(test_results[i].observed_statistic, (len(test_results)-1) - i, xerr=numpy.array([[low97, high97]]).T, capsize=4,
2043+
color='slategray', alpha=1.0, zorder=0)
2044+
p_values.append(test_results[i].quantile[1] * 2.0) # Calculated p-values according to Meletti et al., (2021)
2045+
2046+
if p_values[i] < 10e-5:
2047+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'red', markersize= 8, zorder=2)
2048+
2049+
if p_values[i] >= 10e-5 and p_values[i] < 10e-4:
2050+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = '#FF7F50', markersize= 8, zorder=2)
2051+
2052+
if p_values[i] >= 10e-4 and p_values[i] < 10e-3:
2053+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'gold', markersize= 8, zorder=2)
2054+
2055+
if p_values[i] >= 10e-3 and p_values[i] < 0.0125:
2056+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'white', markersize= 8, zorder=2)
2057+
2058+
if p_values[i] >= 0.0125 and p_values[i] < 0.025:
2059+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'skyblue', markersize= 8, zorder=2)
2060+
2061+
if p_values[i] >= 0.025:
2062+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'blue', markersize= 8, zorder=2)
2063+
2064+
if test_results[0].name == 'Poisson N-Test':
2065+
2066+
for i in range(len(test_results)):
2067+
plow97 = scipy.stats.poisson.ppf((1 - percentile/100.)/2., test_results[i].test_distribution[1])
2068+
phigh97 = scipy.stats.poisson.ppf(1- (1 - percentile/100.)/2., test_results[i].test_distribution[1])
2069+
low97 = test_results[i].observed_statistic - plow97
2070+
high97 = phigh97 - test_results[i].observed_statistic
2071+
ax.errorbar(test_results[i].observed_statistic, (len(test_results)-1) - i, xerr=numpy.array([[low97, high97]]).T, capsize=4,
2072+
color='slategray', alpha=1.0, zorder=0)
2073+
p_values.append(test_results[i].quantile[1] * 2.0)
2074+
2075+
if p_values[i] < 10e-5:
2076+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'red', markersize= 8, zorder=2)
2077+
2078+
if p_values[i] >= 10e-5 and p_values[i] < 10e-4:
2079+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = '#FF7F50', markersize= 8, zorder=2)
2080+
2081+
if p_values[i] >= 10e-4 and p_values[i] < 10e-3:
2082+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'gold', markersize= 8, zorder=2)
2083+
2084+
if p_values[i] >= 10e-3 and p_values[i] < 0.0125:
2085+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'white', markersize= 8, zorder=2)
2086+
2087+
if p_values[i] >= 0.0125 and p_values[i] < 0.025:
2088+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'skyblue', markersize= 8, zorder=2)
2089+
2090+
if p_values[i] >= 0.025:
2091+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'blue', markersize= 8, zorder=2)
2092+
2093+
2094+
else:
2095+
for i in range(len(test_results)):
2096+
plow97 = numpy.percentile(test_results[i].test_distribution, 2.5)
2097+
phigh97 = numpy.percentile(test_results[i].test_distribution, 100)
2098+
low97 = test_results[i].observed_statistic - plow97
2099+
high97 = phigh97 - test_results[i].observed_statistic
2100+
ax.errorbar(test_results[i].observed_statistic, (len(test_results)-1) -i, xerr=numpy.array([[low97, high97]]).T, capsize=4,
2101+
color='slategray', alpha=1.0, zorder=0)
2102+
p_values.append(test_results[i].quantile)
2103+
2104+
if p_values[i] < 10e-5:
2105+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'red', markersize= 8, zorder=2)
2106+
2107+
if p_values[i] >= 10e-5 and p_values[i] < 10e-4:
2108+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = '#FF7F50', markersize= 8, zorder=2)
2109+
2110+
if p_values[i] >= 10e-4 and p_values[i] < 10e-3:
2111+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'gold', markersize= 8, zorder=2)
2112+
2113+
if p_values[i] >= 10e-3 and p_values[i] < 0.025:
2114+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'white', markersize= 8, zorder=2)
2115+
2116+
if p_values[i] >= 0.025 and p_values[i] < 0.05:
2117+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'skyblue', markersize= 8, zorder=2)
2118+
2119+
if p_values[i] >= 0.05:
2120+
ax.plot(test_results[i].observed_statistic, (len(test_results)-1) - i, marker='o', color = 'blue', markersize= 8, zorder=2)
2121+
2122+
legend_elements = [Line2D([0], [0], marker='o', color='red', lw=0, label=r'p < 10e-5', markersize=10, markeredgecolor='k'),
2123+
Line2D([0], [0], marker='o', color='#FF7F50', lw=0, label=r'10e-5 $\leq$ p < 10e-4', markersize=10, markeredgecolor='k'),
2124+
Line2D([0], [0], marker='o', color='gold', lw=0, label=r'10e-4 $\leq$ p < 10e-3', markersize=10, markeredgecolor='k'),
2125+
Line2D([0], [0], marker='o', color='white', lw=0, label=r'10e-3 $\leq$ p < 0.025', markersize=10, markeredgecolor='k'),
2126+
Line2D([0], [0], marker='o', color='skyblue', lw=0, label=r'0.025 $\leq$ p < 0.05', markersize=10, markeredgecolor='k'),
2127+
Line2D([0], [0], marker='o', color='blue', lw=0, label=r'p $\geq$ 0.05', markersize=10, markeredgecolor='k')]
2128+
2129+
ax.legend(handles=legend_elements, loc=4, fontsize=13, edgecolor='k')
2130+
2131+
return ax

0 commit comments

Comments
 (0)