SCECcode · wsavran · Jul 12, 2021 · Apr 30, 2021 · May 21, 2021 · Jun 3, 2021
diff --git a/csep/core/poisson_evaluations.py b/csep/core/poisson_evaluations.py
@@ -32,8 +32,8 @@ def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, sc
     target_event_rate_forecast2, n_fore2 = benchmark_forecast.target_event_rates(observed_catalog, scale=scale)
 
     # call the primative version operating on ndarray
-    out = _t_test_ndarray(target_event_rate_forecast1, target_event_rate_forecast2, observed_catalog.event_count, n_fore1, n_fore2,
-                          alpha=alpha)
+    out = _t_test_ndarray(target_event_rate_forecast1, target_event_rate_forecast2, observed_catalog.event_count,
+                          n_fore1, n_fore2, alpha=alpha)
 
     # storing this for later
     result = EvaluationResult()
@@ -153,7 +153,8 @@ def number_test(gridded_forecast, observed_catalog):
 
     return result
 
-def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
+def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None,
+                                random_numbers=None, verbose=False):
     """Performs the conditional likelihood test on Gridded Forecast using an Observed Catalog.
 
     This test normalizes the forecast so the forecasted rate are consistent with the observations. This modification
@@ -181,12 +182,14 @@ def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulati
         observed_catalog.region = gridded_forecast.region
     gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
 
-
     # simply call likelihood test on catalog data and forecast
     qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data, gridded_catalog_data,
-                                                        num_simulations=num_simulations, seed=seed, random_numbers=random_numbers,
+                                                        num_simulations=num_simulations,
+                                                        seed=seed,
+                                                        random_numbers=random_numbers,
                                                         use_observed_counts=True,
-                                                        verbose=verbose)
+                                                        verbose=verbose,
+                                                        normalize_likelihood=False)
 
     # populate result data structure
     result = EvaluationResult()
@@ -231,7 +234,7 @@ def poisson_spatial_likelihood(forecast, catalog):
     return poll
 
 
-def _binary_spatial_likelihood(forecast, catalog):
+def binary_spatial_likelihood(forecast, catalog):
     """
     This function computes log-likelihood scores (bills), using a binary likelihood distribution of earthquakes.
     For this aim, we need an input variable 'forecast' and an variable'catalog'
@@ -265,7 +268,8 @@ def _binary_spatial_likelihood(forecast, catalog):
 
     return bill
 
-def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
+def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None,
+                   verbose=False):
     """
     Performs the Magnitude Test on a Gridded Forecast using an observed catalog.
 
@@ -293,7 +297,8 @@ def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, see
                                                         seed=seed,
                                                         random_numbers=random_numbers,
                                                         use_observed_counts=True,
-                                                        verbose=verbose)
+                                                        verbose=verbose,
+                                                        normalize_likelihood=True)
 
     # populate result data structure
     result = EvaluationResult()
@@ -308,7 +313,8 @@ def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, see
 
     return result
 
-def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
+def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None,
+                 verbose=False):
     """
     Performs the Spatial Test on the Forecast using the Observed Catalogs.
 
@@ -336,7 +342,8 @@ def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=
                                                         seed=seed,
                                                         random_numbers=random_numbers,
                                                         use_observed_counts=True,
-                                                        verbose=verbose)
+                                                        verbose=verbose,
+                                                        normalize_likelihood=True)
 
     # populate result data structure
     result = EvaluationResult()
@@ -353,7 +360,8 @@ def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=
         result.min_mw = -1
     return result
 
-def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None, verbose=False):
+def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None,
+                    verbose=False):
     """
     Performs the likelihood test on Gridded Forecast using an Observed Catalog.
 
@@ -383,9 +391,12 @@ def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, se
 
     # simply call likelihood test on catalog and forecast
     qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data, gridded_catalog_data,
-                                                        num_simulations=num_simulations, seed=seed, random_numbers=random_numbers,
+                                                        num_simulations=num_simulations,
+                                                        seed=seed,
+                                                        random_numbers=random_numbers,
                                                         use_observed_counts=False,
-                                                        verbose=verbose)
+                                                        verbose=verbose,
+                                                        normalize_likelihood=False)
 
     # populate result data structure
     result = EvaluationResult()
@@ -530,7 +541,6 @@ def _w_test_ndarray(x, m=0):
     return w_test_eval
 
 def _simulate_catalog(num_events, sampling_weights, sim_fore, random_numbers=None):
-
     # generate uniformly distributed random numbers in [0,1), this
     if random_numbers is None:
         random_numbers = numpy.random.rand(num_events)
@@ -550,17 +560,21 @@ def _simulate_catalog(num_events, sampling_weights, sim_fore, random_numbers=Non
 
     return sim_fore
 
-def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None, seed=None, use_observed_counts=True, verbose=True):
-    """
-    Computes the likelihood-test from CSEP using an efficient simulation based approach.
-    Args:
-        forecast_data (numpy.ndarray): nd array where [:, -1] are the magnitude bins.
-        observed_data (numpy.ndarray): same format as observation.
-        num_simulations: default number of simulations to use for likelihood based simulations
-        seed: used for reproducibility of the prng
-        random_numbers (numpy.ndarray): can supply an explicit list of random numbers, primarily used for software testing
-        use_observed_counts (bool): if true, will simulate catalogs using the observed events, if false will draw from poisson distrubtion
+def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None,
+                             seed=None, use_observed_counts=True, verbose=True, normalize_likelihood=False):
     """
+	Computes the likelihood-test from CSEP using an efficient simulation based approach.
+	Args:
+	    forecast_data (numpy.ndarray): nd array where [:, -1] are the magnitude bins.
+	    observed_data (numpy.ndarray): same format as observation.
+	    num_simulations: default number of simulations to use for likelihood based simulations
+    	seed: used for reproducibility of the prng
+	    random_numbers (numpy.ndarray): can supply an explicit list of random numbers, primarily used for software testing
+	    use_observed_counts (bool): if true, will simulate catalogs using the observed events, if false will draw from poisson distribution
+	    verbose (bool): if true, write progress of test to command line
+	    normalize_likelihood (bool): if true, normalize likelihood. used by deafult for magnitude and spatial tests
+	"""
+
     # set seed for the likelihood test
     if seed is not None:
         numpy.random.seed(seed)
@@ -571,48 +585,47 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
     # data structures to store results
     sim_fore = numpy.zeros(sampling_weights.shape)
     simulated_ll = []
+
+    # properties of observations and forecasts
     n_obs = numpy.sum(observed_data)
     n_fore = numpy.sum(forecast_data)
 
-    # used for conditional-likelihood, magnitude, and spatial tests to normalize the rate-component of the forecasts.
-    if use_observed_counts:
+    expected_forecast_count = numpy.sum(forecast_data)
+    log_bin_expectations = numpy.log(forecast_data.ravel())
+    # used for conditional-likelihood, magnitude, and spatial tests to normalize the rate-component of the forecasts
+    if use_observed_counts and normalize_likelihood:
         scale = n_obs / n_fore
         expected_forecast_count = int(n_obs)
-        # Numpy warnings can occur if forecasts have masked cells and thus zero valued rates
-        with numpy.errstate(divide='ignore'):
-            log_bin_expectations = numpy.log(forecast_data.ravel() * scale)
-    else:
-        expected_forecast_count = numpy.sum(forecast_data)
-        with numpy.errstate(divide='ignore'):
-            log_bin_expectations = numpy.log(forecast_data.ravel())
+        log_bin_expectations = numpy.log(forecast_data.ravel() * scale)
 
     # gets the 1d indices to bins that contain target events, these indexes perform copies and not views into the array
     target_idx = numpy.nonzero(observed_data.ravel())
 
-    # these operations perform copies
+    # note for performance: these operations perform copies
     observed_data_nonzero = observed_data.ravel()[target_idx]
     target_event_forecast = log_bin_expectations[target_idx] * observed_data_nonzero
 
     # main simulation step in this loop
     for idx in range(num_simulations):
         if use_observed_counts:
-            num_events_to_simulate = expected_forecast_count
+            num_events_to_simulate = int(n_obs)
         else:
             num_events_to_simulate = int(numpy.random.poisson(expected_forecast_count))
 
         if random_numbers is None:
             sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore)
         else:
             sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore,
-                                         random_numbers=random_numbers[idx,:])
+                                         random_numbers=random_numbers[idx, :])
 
         # compute joint log-likelihood from simulation by leveraging that only cells with target events contribute to likelihood
         sim_target_idx = numpy.nonzero(sim_fore)
         sim_obs_nonzero = sim_fore[sim_target_idx]
         sim_target_event_forecast = log_bin_expectations[sim_target_idx] * sim_obs_nonzero
 
         # compute joint log-likelihood
-        current_ll = poisson_joint_log_likelihood_ndarray(sim_target_event_forecast, sim_obs_nonzero, expected_forecast_count)
+        current_ll = poisson_joint_log_likelihood_ndarray(sim_target_event_forecast, sim_obs_nonzero,
+                                                          expected_forecast_count)
 
         # append to list of simulated log-likelihoods
         simulated_ll.append(current_ll)
@@ -630,9 +643,3 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
 
     # float, float, list
     return qs, obs_ll, simulated_ll
-
-"""
-Created on Thu Jan 23 20:06:58 2020
-
-@author: khawaja and wsavran
-"""