|
2 | 2 | import pandas as pd |
3 | 3 | from scipy import stats |
4 | 4 | from scipy import optimize |
| 5 | +from scipy import signal |
5 | 6 | from mhkit.wave.resource import frequency_moment |
6 | 7 |
|
7 | 8 |
|
@@ -157,6 +158,124 @@ def peaks_distribution_weibull_tail_fit(x): |
157 | 158 | return peaks |
158 | 159 |
|
159 | 160 |
|
| 161 | +def automatic_hs_threshold( |
| 162 | + peaks, |
| 163 | + sampling_rate, |
| 164 | + initial_threshold_range = (0.990, 0.999, 0.001), |
| 165 | + max_refinement=5 |
| 166 | +): |
| 167 | + """ |
| 168 | + Find the best significant wave height threshold for the |
| 169 | + peaks-over-threshold method. |
| 170 | +
|
| 171 | + This method was developed by: |
| 172 | +
|
| 173 | + > Neary, V. S., S. Ahn, B. E. Seng, M. N. Allahdadi, T. Wang, Z. Yang and R. He (2020). |
| 174 | + > "Characterization of Extreme Wave Conditions for Wave Energy Converter Design and Project Risk Assessment.” |
| 175 | + > J. Mar. Sci. Eng. 2020, 8(4), 289; https://doi.org/10.3390/jmse8040289. |
| 176 | +
|
| 177 | + please cite this paper if using this method. |
| 178 | +
|
| 179 | + After all thresholds in the initial range are evaluated, the search |
| 180 | + range is refined around the optimal point until either (i) there |
| 181 | + is minimal change from the previous refinement results, (ii) the |
| 182 | + number of data points become smaller than about 1 per year, or (iii) |
| 183 | + the maximum number of iterations is reached. |
| 184 | +
|
| 185 | + Parameters |
| 186 | + ---------- |
| 187 | + peaks: np.array |
| 188 | + Peak values of the response time-series |
| 189 | + sampling_rate: float |
| 190 | + Sampling rate in hours. |
| 191 | + initial_threshold_range: tuple |
| 192 | + Initial range of thresholds to search. Described as |
| 193 | + (min, max, step). |
| 194 | + max_refinement: int |
| 195 | + Maximum number of times to refine the search range. |
| 196 | +
|
| 197 | + Returns |
| 198 | + ------- |
| 199 | + best_threshold: float |
| 200 | + Threshold that results in the best correlation. |
| 201 | + """ |
| 202 | + assert isinstance(sampling_rate, (float, int)), ( |
| 203 | + 'sampling_rate must be of type float') |
| 204 | + assert isinstance(peaks, np.ndarray), 'peaks must be of type np.ndarray' |
| 205 | + assert len(initial_threshold_range) == 3, ( |
| 206 | + 'initial_threshold_range must be length 3') |
| 207 | + assert isinstance(max_refinement, int) |
| 208 | + |
| 209 | + range_min, range_max, range_step = initial_threshold_range |
| 210 | + best_threshold = -1 |
| 211 | + years = len(peaks)/(365.25*24/sampling_rate) |
| 212 | + |
| 213 | + def _peaks_over_threshold(peaks, threshold, sampling_rate): |
| 214 | + threshold_unit = stats.scoreatpercentile(peaks, 100*threshold) |
| 215 | + idx_peaks = np.arange(len(peaks)) |
| 216 | + idx_storm_peaks, storm_peaks = global_peaks( |
| 217 | + idx_peaks, peaks-threshold_unit) |
| 218 | + idx_storm_peaks = idx_storm_peaks.astype(int) |
| 219 | + |
| 220 | + # Two storms that are close enough (within specified window) are |
| 221 | + # considered the same storm, to ensure independence. |
| 222 | + independent_storm_peaks = [storm_peaks[0],] |
| 223 | + idx_independent_storm_peaks = [idx_storm_peaks[0],] |
| 224 | + # check first 14 days to determine window size |
| 225 | + nlags = int(14 * 24 / sampling_rate) |
| 226 | + x = peaks - np.mean(peaks) |
| 227 | + acf = signal.correlate(x, x, mode="full") |
| 228 | + lag = signal.correlation_lags(len(x), len(x), mode="full") |
| 229 | + idx_zero = np.argmax(lag==0) |
| 230 | + positive_lag = lag[(idx_zero+1):(idx_zero+nlags)] |
| 231 | + acf_positive = acf[(idx_zero+1):(idx_zero+nlags)] / acf[idx_zero] |
| 232 | + window_size = sampling_rate * positive_lag[np.argmax(acf_positive<0.5)] |
| 233 | + # window size in "observations" instead of "hours" between peaks. |
| 234 | + window = window_size / sampling_rate |
| 235 | + # keep only independent storm peaks |
| 236 | + for idx in idx_storm_peaks[1:]: |
| 237 | + if (idx - idx_independent_storm_peaks[-1]) > window: |
| 238 | + idx_independent_storm_peaks.append(idx) |
| 239 | + independent_storm_peaks.append(peaks[idx]) |
| 240 | + |
| 241 | + return independent_storm_peaks |
| 242 | + |
| 243 | + for i in range(max_refinement): |
| 244 | + thresholds = np.arange(range_min, range_max, range_step) |
| 245 | + correlations = [] |
| 246 | + |
| 247 | + for threshold in thresholds: |
| 248 | + distribution = stats.genpareto |
| 249 | + over_threshold = _peaks_over_threshold( |
| 250 | + peaks, threshold, sampling_rate) |
| 251 | + rate_per_year = len(over_threshold) / years |
| 252 | + if rate_per_year < 2: |
| 253 | + break |
| 254 | + distributions_parameters = distribution.fit( |
| 255 | + over_threshold, floc=0.) |
| 256 | + _, (_, _, correlation) = stats.probplot( |
| 257 | + peaks, distributions_parameters, distribution, fit=True) |
| 258 | + correlations.append(correlation) |
| 259 | + |
| 260 | + max_i = np.argmax(correlations) |
| 261 | + minimal_change = np.abs(best_threshold - thresholds[max_i]) < 0.0005 |
| 262 | + best_threshold = thresholds[max_i] |
| 263 | + if minimal_change and i<max_refinement-1: |
| 264 | + break |
| 265 | + range_step /= 10 |
| 266 | + if max_i == len(thresholds)-1: |
| 267 | + range_min = thresholds[max_i - 1] |
| 268 | + range_max = thresholds[max_i] + 5*range_step |
| 269 | + elif max_i == 0: |
| 270 | + range_min = thresholds[max_i] - 9*range_step |
| 271 | + range_max = thresholds[max_i + 1] |
| 272 | + else: |
| 273 | + range_min = thresholds[max_i-1] |
| 274 | + range_max = thresholds[max_i+1] |
| 275 | + |
| 276 | + return best_threshold |
| 277 | + |
| 278 | + |
160 | 279 | def peaks_distribution_peaks_over_threshold(x, threshold=None): |
161 | 280 | """ |
162 | 281 | Estimate the peaks distribution using the peaks over threshold |
|
0 commit comments