Skip to content

Commit d6ea660

Browse files
bayonato89wsavran
andcommitted
Non poissonian tests (SCECcode#189)
* Changed np for numpy in the poisson_evaluations.py file * Added non-Poissonian comparative and consistency tests. * format docstrings * Rename non_poissonian_evaluations.py to binomial_evaluations.py * Update binomial_evaluations.py removed unused imports Co-authored-by: wsavran <35315438+wsavran@users.noreply.github.com>
1 parent 0ef85ed commit d6ea660

File tree

1 file changed

+372
-0
lines changed

1 file changed

+372
-0
lines changed

csep/core/binomial_evaluations.py

Lines changed: 372 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,372 @@
1+
import numpy
2+
import scipy.stats
3+
import scipy.spatial
4+
5+
from csep.models import EvaluationResult
6+
from csep.core.exceptions import CSEPCatalogException
7+
8+
def _nbd_number_test_ndarray(fore_cnt, obs_cnt, variance, epsilon=1e-6):
9+
""" Computes delta1 and delta2 values from the Negative Binomial (NBD) number test.
10+
11+
Args:
12+
fore_cnt (float): parameter of negative binomial distribution coming from expected value of the forecast
13+
obs_cnt (float): count of earthquakes observed during the testing period.
14+
variance (float): variance parameter of negative binomial distribution coming from historical catalog.
15+
A variance value of approximately 23541 has been calculated using M5.95+ earthquakes observed worldwide from 1982 to 2013.
16+
epsilon (float): tolerance level to satisfy the requirements of two-sided p-value
17+
18+
Returns
19+
result (tuple): (delta1, delta2)
20+
"""
21+
var = variance
22+
mean = fore_cnt
23+
upsilon = 1.0 - ((var - mean) / var)
24+
tau = (mean**2 /(var - mean))
25+
26+
delta1 = 1.0 - scipy.stats.nbinom.cdf(obs_cnt - epsilon, tau, upsilon, loc=0)
27+
delta2 = scipy.stats.nbinom.cdf(obs_cnt + epsilon, tau, upsilon, loc=0)
28+
29+
return delta1, delta2
30+
31+
32+
def negative_binomial_number_test(gridded_forecast, observed_catalog, variance):
33+
""" Computes "negative binomial N-Test" on a gridded forecast.
34+
35+
Computes Number (N) test for Observed and Forecasts. Both data sets are expected to be in terms of event counts.
36+
We find the Total number of events in Observed Catalog and Forecasted Catalogs. Which are then employed to compute the
37+
probablities of
38+
(i) At least no. of events (delta 1)
39+
(ii) At most no. of events (delta 2) assuming the negative binomial distribution.
40+
41+
Args:
42+
gridded_forecast: Forecast of a Model (Gridded) (Numpy Array)
43+
A forecast has to be in terms of Average Number of Events in Each Bin
44+
It can be anything greater than zero
45+
observed_catalog: Observed (Gridded) seismicity (Numpy Array):
46+
An Observation has to be Number of Events in Each Bin
47+
It has to be a either zero or positive integer only (No Floating Point)
48+
variance: Variance parameter of negative binomial distribution obtained from historical catalog.
49+
50+
Returns:
51+
out (tuple): (delta_1, delta_2)
52+
"""
53+
result = EvaluationResult()
54+
55+
# observed count
56+
obs_cnt = observed_catalog.event_count
57+
58+
# forecasts provide the expeceted number of events during the time horizon of the forecast
59+
fore_cnt = gridded_forecast.event_count
60+
61+
epsilon = 1e-6
62+
63+
# stores the actual result of the number test
64+
delta1, delta2 = _nbd_number_test_ndarray(fore_cnt, obs_cnt, variance, epsilon=epsilon)
65+
66+
# store results
67+
result.test_distribution = ('negative_binomial', fore_cnt)
68+
result.name = 'NBD N-Test'
69+
result.observed_statistic = obs_cnt
70+
result.quantile = (delta1, delta2)
71+
result.sim_name = gridded_forecast.name
72+
result.obs_name = observed_catalog.name
73+
result.status = 'normal'
74+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
75+
76+
return result
77+
78+
79+
def binary_joint_log_likelihood_ndarray(forecast, catalog):
80+
""" Computes Bernoulli log-likelihood scores, assuming that earthquakes follow a binomial distribution.
81+
82+
Args:
83+
forecast: Forecast of a Model (Gridded) (Numpy Array)
84+
A forecast has to be in terms of Average Number of Events in Each Bin
85+
It can be anything greater than zero
86+
catalog: Observed (Gridded) seismicity (Numpy Array):
87+
An Observation has to be Number of Events in Each Bin
88+
It has to be a either zero or positive integer only (No Floating Point)
89+
"""
90+
#First, we mask the forecast in cells where we could find log=0.0 singularities:
91+
forecast_masked = np.ma.masked_where(forecast.ravel() <= 0.0, forecast.ravel())
92+
93+
#Then, we compute the log-likelihood of observing one or more events given a Poisson distribution, i.e., 1 - Pr(0)
94+
target_idx = numpy.nonzero(catalog.ravel())
95+
y = numpy.zeros(forecast_masked.ravel().shape)
96+
y[target_idx[0]] = 1
97+
first_term = y * (np.log(1.0 - np.exp(-forecast_masked.ravel())))
98+
99+
#Also, we estimate the log-likelihood in cells no events are observed:
100+
second_term = (1-y) * (-forecast_masked.ravel().data)
101+
#Finally, we sum both terms to compute the joint log-likelihood score:
102+
return sum(first_term.data + second_term.data)
103+
104+
105+
def _binary_likelihood_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None,
106+
seed=None, use_observed_counts=True, verbose=True, normalize_likelihood=False):
107+
""" Computes binary conditional-likelihood test from CSEP using an efficient simulation based approach.
108+
109+
Args:
110+
forecast_data (numpy.ndarray): nd array where [:, -1] are the magnitude bins.
111+
observed_data (numpy.ndarray): same format as observation.
112+
num_simulations: default number of simulations to use for likelihood based simulations
113+
seed: used for reproducibility of the prng
114+
random_numbers (numpy.ndarray): can supply an explicit list of random numbers, primarily used for software testing
115+
use_observed_counts (bool): if true, will simulate catalogs using the observed events, if false will draw from poisson
116+
distribution
117+
"""
118+
119+
# Array-masking that avoids log singularities:
120+
forecast_data = numpy.ma.masked_where(forecast_data <= 0.0, forecast_data)
121+
122+
# set seed for the likelihood test
123+
if seed is not None:
124+
numpy.random.seed(seed)
125+
126+
# used to determine where simulated earthquake should be placed, by definition of cumsum these are sorted
127+
sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(forecast_data)
128+
129+
# data structures to store results
130+
sim_fore = numpy.zeros(sampling_weights.shape)
131+
simulated_ll = []
132+
n_obs = len(np.unique(np.nonzero(observed_data.ravel())))
133+
n_fore = numpy.sum(forecast_data)
134+
expected_forecast_count = int(n_obs)
135+
136+
if use_observed_counts and normalize_likelihood:
137+
scale = n_obs / n_fore
138+
expected_forecast_count = int(n_obs)
139+
forecast_data = scale * forecast_data
140+
141+
# main simulation step in this loop
142+
for idx in range(num_simulations):
143+
if use_observed_counts:
144+
num_events_to_simulate = int(n_obs)
145+
else:
146+
num_events_to_simulate = int(numpy.random.poisson(expected_forecast_count))
147+
148+
if random_numbers is None:
149+
sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore)
150+
else:
151+
sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore,
152+
random_numbers=random_numbers[idx,:])
153+
154+
155+
# compute joint log-likelihood
156+
current_ll = binary_joint_log_likelihood_ndarray(forecast_data.data, sim_fore)
157+
158+
# append to list of simulated log-likelihoods
159+
simulated_ll.append(current_ll)
160+
161+
# just be verbose
162+
if verbose:
163+
if (idx + 1) % 100 == 0:
164+
print(f'... {idx + 1} catalogs simulated.')
165+
166+
target_idx = numpy.nonzero(catalog.ravel())
167+
168+
# observed joint log-likelihood
169+
obs_ll = binary_joint_log_likelihood_ndarray(forecast_data.data, observed_data)
170+
171+
# quantile score
172+
qs = numpy.sum(simulated_ll <= obs_ll) / num_simulations
173+
174+
# float, float, list
175+
return qs, obs_ll, simulated_ll
176+
177+
178+
def binary_spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
179+
""" Performs the binary spatial test on the Forecast using the Observed Catalogs.
180+
181+
Note: The forecast and the observations should be scaled to the same time period before calling this function. This increases
182+
transparency as no assumptions are being made about the length of the forecasts. This is particularly important for
183+
gridded forecasts that supply their forecasts as rates.
184+
185+
Args:
186+
gridded_forecast: csep.core.forecasts.GriddedForecast
187+
observed_catalog: csep.core.catalogs.Catalog
188+
num_simulations (int): number of simulations used to compute the quantile score
189+
seed (int): used fore reproducibility, and testing
190+
random_numbers (numpy.ndarray): random numbers used to override the random number generation. injection point for testing.
191+
192+
Returns:
193+
evaluation_result: csep.core.evaluations.EvaluationResult
194+
"""
195+
196+
# grid catalog onto spatial grid
197+
gridded_catalog_data = observed_catalog.spatial_counts()
198+
199+
# simply call likelihood test on catalog data and forecast
200+
qs, obs_ll, simulated_ll = _binary_likelihood_test(gridded_forecast.spatial_counts(), gridded_catalog_data,
201+
num_simulations=num_simulations,
202+
seed=seed,
203+
random_numbers=random_numbers,
204+
use_observed_counts=True,
205+
verbose=verbose, normalize_likelihood=True)
206+
207+
208+
# populate result data structure
209+
result = EvaluationResult()
210+
result.test_distribution = simulated_ll
211+
result.name = 'Binary S-Test'
212+
result.observed_statistic = obs_ll
213+
result.quantile = qs
214+
result.sim_name = gridded_forecast.name
215+
result.obs_name = observed_catalog.name
216+
result.status = 'normal'
217+
try:
218+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
219+
except AttributeError:
220+
result.min_mw = -1
221+
return result
222+
223+
224+
def binary_conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
225+
""" Performs the binary conditional likelihood test on Gridded Forecast using an Observed Catalog.
226+
227+
Normalizes the forecast so the forecasted rate are consistent with the observations. This modification
228+
eliminates the strong impact differences in the number distribution have on the forecasted rates.
229+
230+
Note: The forecast and the observations should be scaled to the same time period before calling this function. This increases
231+
transparency as no assumptions are being made about the length of the forecasts. This is particularly important for
232+
gridded forecasts that supply their forecasts as rates.
233+
234+
Args:
235+
gridded_forecast: csep.core.forecasts.GriddedForecast
236+
observed_catalog: csep.core.catalogs.Catalog
237+
num_simulations (int): number of simulations used to compute the quantile score
238+
seed (int): used fore reproducibility, and testing
239+
random_numbers (numpy.ndarray): random numbers used to override the random number generation. injection point for testing.
240+
241+
Returns:
242+
evaluation_result: csep.core.evaluations.EvaluationResult
243+
"""
244+
245+
# grid catalog onto spatial grid
246+
try:
247+
_ = observed_catalog.region.magnitudes
248+
except CSEPCatalogException:
249+
observed_catalog.region = gridded_forecast.region
250+
gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
251+
252+
# simply call likelihood test on catalog data and forecast
253+
qs, obs_ll, simulated_ll = _binary_likelihood_test(gridded_forecast.data, gridded_catalog_data,
254+
num_simulations=num_simulations, seed=seed, random_numbers=random_numbers,
255+
use_observed_counts=True,
256+
verbose=verbose, normalize_likelihood=False)
257+
258+
# populate result data structure
259+
result = EvaluationResult()
260+
result.test_distribution = simulated_ll
261+
result.name = 'Binary CL-Test'
262+
result.observed_statistic = obs_ll
263+
result.quantile = qs
264+
result.sim_name = gridded_forecast.name
265+
result.obs_name = observed_catalog.name
266+
result.status = 'normal'
267+
result.min_mw = numpy.min(gridded_forecast.magnitudes)
268+
269+
return result
270+
271+
272+
def matrix_binary_t_test(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2, catalog, alpha=0.05):
273+
""" Computes binary T test statistic by comparing two target event rate distributions.
274+
275+
We compare Forecast from Model 1 and with Forecast of Model 2. Information Gain per Active Bin (IGPA) is computed, which is then
276+
employed to compute T statistic. Confidence interval of Information Gain can be computed using T_critical. For a complete
277+
explanation see Rhoades, D. A., et al., (2011). Efficient testing of earthquake forecasting models. Acta Geophysica, 59(4),
278+
728-747. doi:10.2478/s11600-011-0013-5, and Bayona J.A. et al., (2022). Prospective evaluation of multiplicative hybrid earthquake
279+
forecasting models in California. doi: 10.1093/gji/ggac018.
280+
281+
Args:
282+
target_event_rates1 (numpy.ndarray): nd-array storing target event rates
283+
target_event_rates2 (numpy.ndarray): nd-array storing target event rates
284+
n_obs (float, int, numpy.ndarray): number of observed earthquakes, should be whole number and >= zero.
285+
n_f1 (float): Total number of forecasted earthquakes by Model 1
286+
n_f2 (float): Total number of forecasted earthquakes by Model 2
287+
catalog: csep.core.catalogs.Catalog
288+
alpha (float): tolerance level for the type-i error rate of the statistical test
289+
290+
Returns:
291+
out (dict): relevant statistics from the t-test
292+
"""
293+
# Some Pre Calculations - Because they are being used repeatedly.
294+
N_p = n_obs
295+
N = len(np.unique(np.nonzero(catalog.spatial_magnitude_counts().ravel()))) # Number of active bins
296+
N1 = n_f1
297+
N2 = n_f2
298+
X1 = numpy.log(target_event_rates1) # Log of every element of Forecast 1
299+
X2 = numpy.log(target_event_rates2) # Log of every element of Forecast 2
300+
301+
302+
# Information Gain, using Equation (17) of Rhoades et al. 2011
303+
information_gain = (numpy.sum(X1 - X2) - (N1 - N2)) / N
304+
305+
# Compute variance of (X1-X2) using Equation (18) of Rhoades et al. 2011
306+
first_term = (numpy.sum(numpy.power((X1 - X2), 2))) / (N - 1)
307+
second_term = numpy.power(numpy.sum(X1 - X2), 2) / (numpy.power(N, 2) - N)
308+
forecast_variance = first_term - second_term
309+
310+
forecast_std = numpy.sqrt(forecast_variance)
311+
t_statistic = information_gain / (forecast_std / numpy.sqrt(N))
312+
313+
# Obtaining the Critical Value of T from T distribution.
314+
df = N - 1
315+
t_critical = scipy.stats.t.ppf(1 - (alpha / 2), df) # Assuming 2-Tail Distribution for 2 tail, divide 0.05/2.
316+
317+
# Computing Information Gain Interval.
318+
ig_lower = information_gain - (t_critical * forecast_std / numpy.sqrt(N))
319+
ig_upper = information_gain + (t_critical * forecast_std / numpy.sqrt(N))
320+
321+
# If T value greater than T critical, Then both Lower and Upper Confidence Interval limits will be greater than Zero.
322+
# If above Happens, Then It means that Forecasting Model 1 is better than Forecasting Model 2.
323+
return {'t_statistic': t_statistic,
324+
't_critical': t_critical,
325+
'information_gain': information_gain,
326+
'ig_lower': ig_lower,
327+
'ig_upper': ig_upper}
328+
329+
330+
def binary_paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, scale=False):
331+
""" Computes the binary t-test for gridded earthquake forecasts.
332+
333+
This score is positively oriented, meaning that positive values of the information gain indicate that the
334+
forecast is performing better than the benchmark forecast.
335+
336+
Args:
337+
forecast (csep.core.forecasts.GriddedForecast): nd-array storing gridded rates, axis=-1 should be the magnitude column
338+
benchmark_forecast (csep.core.forecasts.GriddedForecast): nd-array storing gridded rates, axis=-1 should be the magnitude
339+
column
340+
observed_catalog (csep.core.catalogs.AbstractBaseCatalog): number of observed earthquakes, should be whole number and >= zero.
341+
alpha (float): tolerance level for the type-i error rate of the statistical test
342+
scale (bool): if true, scale forecasted rates down to a single day
343+
344+
Returns:
345+
evaluation_result: csep.core.evaluations.EvaluationResult
346+
"""
347+
348+
# needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different
349+
# for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts.
350+
target_event_rate_forecast1p, n_fore1 = forecast.target_event_rates(observed_catalog, scale=scale)
351+
target_event_rate_forecast2p, n_fore2 = benchmark_forecast.target_event_rates(observed_catalog, scale=scale)
352+
353+
target_event_rate_forecast1 = forecast.data.ravel()[np.unique(np.nonzero(observed_catalog.spatial_magnitude_counts().ravel()))]
354+
target_event_rate_forecast2 = benchmark_forecast.data.ravel()[np.unique(np.nonzero(observed_catalog.spatial_magnitude_counts().
355+
ravel()))]
356+
357+
# call the primative version operating on ndarray
358+
out = matrix_binary_t_test(target_event_rate_forecast1, target_event_rate_forecast2, observed_catalog.event_count, n_fore1, n_fore2,
359+
observed_catalog,
360+
alpha=alpha)
361+
362+
# storing this for later
363+
result = EvaluationResult()
364+
result.name = 'binary paired T-Test'
365+
result.test_distribution = (out['ig_lower'], out['ig_upper'])
366+
result.observed_statistic = out['information_gain']
367+
result.quantile = (out['t_statistic'], out['t_critical'])
368+
result.sim_name = (forecast.name, benchmark_forecast.name)
369+
result.obs_name = observed_catalog.name
370+
result.status = 'normal'
371+
result.min_mw = np.min(forecast.magnitudes)
372+
return result

0 commit comments

Comments
 (0)