Skip to content

Commit 4477580

Browse files
automatic Hs threshold
1 parent 2ccc286 commit 4477580

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed

examples/data/loads/data_loads_hs.csv

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

mhkit/loads/extreme.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import pandas as pd
33
from scipy import stats
44
from scipy import optimize
5+
from scipy import signal
56
from mhkit.wave.resource import frequency_moment
67

78

@@ -157,6 +158,124 @@ def peaks_distribution_weibull_tail_fit(x):
157158
return peaks
158159

159160

161+
def automatic_hs_threshold(
162+
peaks,
163+
sampling_rate,
164+
initial_threshold_range = (0.990, 0.999, 0.001),
165+
max_refinement=5
166+
):
167+
"""
168+
Find the best significant wave height threshold for the
169+
peaks-over-threshold method.
170+
171+
This method was developed by:
172+
173+
> Neary, V. S., S. Ahn, B. E. Seng, M. N. Allahdadi, T. Wang, Z. Yang and R. He (2020).
174+
> "Characterization of Extreme Wave Conditions for Wave Energy Converter Design and Project Risk Assessment.”
175+
> J. Mar. Sci. Eng. 2020, 8(4), 289; https://doi.org/10.3390/jmse8040289.
176+
177+
please cite this paper if using this method.
178+
179+
After all thresholds in the initial range are evaluated, the search
180+
range is refined around the optimal point until either (i) there
181+
is minimal change from the previous refinement results, (ii) the
182+
number of data points become smaller than about 1 per year, or (iii)
183+
the maximum number of iterations is reached.
184+
185+
Parameters
186+
----------
187+
peaks: np.array
188+
Peak values of the response time-series
189+
sampling_rate: float
190+
Sampling rate in hours.
191+
initial_threshold_range: tuple
192+
Initial range of thresholds to search. Described as
193+
(min, max, step).
194+
max_refinement: int
195+
Maximum number of times to refine the search range.
196+
197+
Returns
198+
-------
199+
best_threshold: float
200+
Threshold that results in the best correlation.
201+
"""
202+
assert isinstance(sampling_rate, (float, int)), (
203+
'sampling_rate must be of type float')
204+
assert isinstance(peaks, np.ndarray), 'peaks must be of type np.ndarray'
205+
assert len(initial_threshold_range) == 3, (
206+
'initial_threshold_range must be length 3')
207+
assert isinstance(max_refinement, int)
208+
209+
range_min, range_max, range_step = initial_threshold_range
210+
best_threshold = -1
211+
years = len(peaks)/(365.25*24/sampling_rate)
212+
213+
def _peaks_over_threshold(peaks, threshold, sampling_rate):
214+
threshold_unit = stats.scoreatpercentile(peaks, 100*threshold)
215+
idx_peaks = np.arange(len(peaks))
216+
idx_storm_peaks, storm_peaks = global_peaks(
217+
idx_peaks, peaks-threshold_unit)
218+
idx_storm_peaks = idx_storm_peaks.astype(int)
219+
220+
# Two storms that are close enough (within specified window) are
221+
# considered the same storm, to ensure independence.
222+
independent_storm_peaks = [storm_peaks[0],]
223+
idx_independent_storm_peaks = [idx_storm_peaks[0],]
224+
# check first 14 days to determine window size
225+
nlags = int(14 * 24 / sampling_rate)
226+
x = peaks - np.mean(peaks)
227+
acf = signal.correlate(x, x, mode="full")
228+
lag = signal.correlation_lags(len(x), len(x), mode="full")
229+
idx_zero = np.argmax(lag==0)
230+
positive_lag = lag[(idx_zero+1):(idx_zero+nlags)]
231+
acf_positive = acf[(idx_zero+1):(idx_zero+nlags)] / acf[idx_zero]
232+
window_size = sampling_rate * positive_lag[np.argmax(acf_positive<0.5)]
233+
# window size in "observations" instead of "hours" between peaks.
234+
window = window_size / sampling_rate
235+
# keep only independent storm peaks
236+
for idx in idx_storm_peaks[1:]:
237+
if (idx - idx_independent_storm_peaks[-1]) > window:
238+
idx_independent_storm_peaks.append(idx)
239+
independent_storm_peaks.append(peaks[idx])
240+
241+
return independent_storm_peaks
242+
243+
for i in range(max_refinement):
244+
thresholds = np.arange(range_min, range_max, range_step)
245+
correlations = []
246+
247+
for threshold in thresholds:
248+
distribution = stats.genpareto
249+
over_threshold = _peaks_over_threshold(
250+
peaks, threshold, sampling_rate)
251+
rate_per_year = len(over_threshold) / years
252+
if rate_per_year < 2:
253+
break
254+
distributions_parameters = distribution.fit(
255+
over_threshold, floc=0.)
256+
_, (_, _, correlation) = stats.probplot(
257+
peaks, distributions_parameters, distribution, fit=True)
258+
correlations.append(correlation)
259+
260+
max_i = np.argmax(correlations)
261+
minimal_change = np.abs(best_threshold - thresholds[max_i]) < 0.0005
262+
best_threshold = thresholds[max_i]
263+
if minimal_change and i<max_refinement-1:
264+
break
265+
range_step /= 10
266+
if max_i == len(thresholds)-1:
267+
range_min = thresholds[max_i - 1]
268+
range_max = thresholds[max_i] + 5*range_step
269+
elif max_i == 0:
270+
range_min = thresholds[max_i] - 9*range_step
271+
range_max = thresholds[max_i + 1]
272+
else:
273+
range_min = thresholds[max_i-1]
274+
range_max = thresholds[max_i+1]
275+
276+
return best_threshold
277+
278+
160279
def peaks_distribution_peaks_over_threshold(x, threshold=None):
161280
"""
162281
Estimate the peaks distribution using the peaks over threshold

mhkit/tests/loads/test_loads.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,12 @@ def test_shortterm_extreme(self):
218218
ste = loads.extreme.ste(t, data, t_st, method)
219219
assert_allclose(ste.cdf(x), cdf_1)
220220

221+
def test_automatic_threshold(self):
222+
filename = "data_loads_hs.csv"
223+
data = np.loadtxt(os.path.join(datadir, filename), delimiter=",")
224+
years = 2.97
225+
threshold = loads.extreme.automatic_peaks_threshold(data, years)
226+
assert np.isclose(threshold, 0.99724)
221227

222228
if __name__ == '__main__':
223229
unittest.main()

0 commit comments

Comments
 (0)