Skip to content

Commit 05fb64c

Browse files
authored
Fix docstring inconsistencies in benchmarking module (resolves #809) (#2735)
* issue#809 Fix docstrings for benchmarking functions * Fixed docstrings in results_loaders.py * Fix docstring inconsistencies in benchmarking module - resolves #809 * Fix docstring inconsistencies in benchmarking module - resolves #809
1 parent 303dbae commit 05fb64c

File tree

3 files changed

+34
-33
lines changed

3 files changed

+34
-33
lines changed

aeon/benchmarking/metrics/anomaly_detection/thresholding.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def percentile_threshold(y_score: np.ndarray, percentile: int) -> float:
3838
def sigma_threshold(y_score: np.ndarray, factor: float = 2) -> float:
3939
r"""Calculate a threshold based on the standard deviation of the anomaly scores.
4040
41-
Computes a threshold :math:`\theta` based on the anomaly scoring's mean
42-
:math:`\mu_s` and the standard deviation :math:`\sigma_s`, ignoring NaNs:
41+
Computes a threshold :math:``\theta`` based on the anomaly scoring's mean
42+
:math:``\mu_s`` and the standard deviation :math:``\sigma_s``, ignoring NaNs:
4343
4444
.. math::
4545
\theta = \mu_{s} + x \cdot \sigma_{s}
@@ -49,7 +49,7 @@ def sigma_threshold(y_score: np.ndarray, factor: float = 2) -> float:
4949
y_score : np.ndarray
5050
Anomaly scores for each point of the time series of shape (n_instances,).
5151
factor : float
52-
Number of standard deviations to use as threshold (:math:`x`).
52+
Number of standard deviations to use as threshold (:math:``x``).
5353
5454
Returns
5555
-------
@@ -62,14 +62,15 @@ def sigma_threshold(y_score: np.ndarray, factor: float = 2) -> float:
6262
def top_k_points_threshold(
6363
y_true: np.ndarray, y_score: np.ndarray, k: int | None = None
6464
) -> float:
65-
"""Calculate a threshold such that at least `k` anomalous points are found.
65+
"""Calculate a threshold such that at least ``k`` anomalous points are found.
6666
6767
The anomalies are single-point anomalies.
6868
6969
Computes a threshold based on the number of expected anomalies (number of
7070
anomalies). This method iterates over all possible thresholds from high to low to
71-
find the first threshold that yields `k` or more anomalous points. If `k` is `None`,
72-
the ground truth data is used to calculate the real number of anomalies.
71+
find the first threshold that yields ``k`` or more anomalous points. If ``k``
72+
is ``None``,the ground truth data is used to calculate the real number of
73+
anomalies.
7374
7475
Parameters
7576
----------
@@ -78,13 +79,13 @@ def top_k_points_threshold(
7879
y_score : np.ndarray
7980
Anomaly scores for each point of the time series of shape (n_instances,).
8081
k : optional int
81-
Number of expected anomalies. If `k` is `None`, the ground truth data is used
82-
to calculate the real number of anomalies.
82+
Number of expected anomalies. If ``k`` is ``None``, the ground truth data
83+
is used to calculate the real number of anomalies.
8384
8485
Returns
8586
-------
8687
float
87-
Threshold such that there are at least `k` anomalous points.
88+
Threshold such that there are at least ``k`` anomalous points.
8889
"""
8990
if k is None:
9091
return np.nanpercentile(y_score, (1 - y_true.sum() / y_true.shape[0]) * 100)
@@ -95,15 +96,15 @@ def top_k_points_threshold(
9596
def top_k_ranges_threshold(
9697
y_true: np.ndarray, y_score: np.ndarray, k: int | None = None
9798
) -> float:
98-
"""Calculate a threshold such that at least `k` anomalies are found.
99+
"""Calculate a threshold such that at least ``k`` anomalies are found.
99100
100101
The anomalies are either single-points anomalies or continuous anomalous ranges.
101102
102103
Computes a threshold based on the number of expected anomalous subsequences /
103104
ranges (number of anomalies). This method iterates over all possible thresholds
104105
from high to low to find the first threshold that yields `k` or more continuous
105-
anomalous ranges. If `k` is `None`, the ground truth data is used to calculate the
106-
real number of anomalies (anomalous ranges).
106+
anomalous ranges. If ``k`` is ``None``, the ground truth data is used to
107+
calculate the real number of anomalies (anomalous ranges).
107108
108109
Parameters
109110
----------
@@ -112,13 +113,13 @@ def top_k_ranges_threshold(
112113
y_score : np.ndarray
113114
Anomaly scores for each point of the time series of shape (n_instances,).
114115
k : optional int
115-
Number of expected anomalies. If `k` is `None`, the ground truth data is used
116-
to calculate the real number of anomalies.
116+
Number of expected anomalies. If ``k`` is ``None``, the ground truth data
117+
is used to calculate the real number of anomalies.
117118
118119
Returns
119120
-------
120121
float
121-
Threshold such that there are at least `k` anomalous ranges.
122+
Threshold such that there are at least ``k`` anomalous ranges.
122123
"""
123124
if k is None:
124125
k = _count_anomaly_ranges(y_true)

aeon/benchmarking/metrics/segmentation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def hausdorff_error(
4747
4848
.. seealso::
4949
50-
This function wraps :py:func:`scipy.spatial.distance.directed_hausdorff`
50+
This function wraps :py:func:``scipy.spatial.distance.directed_hausdorff``
5151
5252
Parameters
5353
----------
@@ -56,7 +56,7 @@ def hausdorff_error(
5656
pred_change_points: array_like
5757
Integer indexes (positions) of predicted change points
5858
symmetric: bool
59-
If `True` symmetric Hausdorff distance will be used
59+
If ``True`` symmetric Hausdorff distance will be used
6060
seed: int, default=0
6161
Local numpy.random.RandomState seed. Default is 0, a random
6262
shuffling of u and v that guarantees reproducibility.

aeon/benchmarking/resampling.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ def resample_data(X_train, y_train, X_test, y_test, random_state=None):
3232
y_test : np.ndarray
3333
Test data labels.
3434
random_state : int, RandomState instance or None, default=None
35-
If `int`, random_state is the seed used by the random number generator;
36-
If `RandomState` instance, random_state is the random number generator;
37-
If `None`, the random number generator is the `RandomState` instance used
38-
by `np.random`.
35+
If ``int``, random_state is the seed used by the random number generator;
36+
If ``RandomState`` instance, random_state is the random number generator;
37+
If ``None``, the random number generator is the ``RandomState`` instance
38+
used by ``np.random``.
3939
4040
Returns
4141
-------
@@ -93,10 +93,10 @@ def resample_data_indices(y_train, y_test, random_state=None):
9393
y_test : np.ndarray
9494
Test data labels.
9595
random_state : int, RandomState instance or None, default=None
96-
If `int`, random_state is the seed used by the random number generator;
97-
If `RandomState` instance, random_state is the random number generator;
98-
If `None`, the random number generator is the `RandomState` instance used
99-
by `np.random`.
96+
If ``int``, random_state is the seed used by the random number generator;
97+
If ``RandomState`` instance, random_state is the random number generator;
98+
If ``None``, the random number generator is the ``RandomState`` instance
99+
used by ``np.random``.
100100
101101
Returns
102102
-------
@@ -136,10 +136,10 @@ def stratified_resample_data(X_train, y_train, X_test, y_test, random_state=None
136136
y_test : np.ndarray
137137
Test data labels.
138138
random_state : int, RandomState instance or None, default=None
139-
If `int`, random_state is the seed used by the random number generator;
140-
If `RandomState` instance, random_state is the random number generator;
141-
If `None`, the random number generator is the `RandomState` instance used
142-
by `np.random`.
139+
If ``int``, random_state is the seed used by the random number generator;
140+
If ``RandomState`` instance, random_state is the random number generator;
141+
If ``None``, the random number generator is the ``RandomState`` instance
142+
used by ``np.random``.
143143
144144
Returns
145145
-------
@@ -200,10 +200,10 @@ def stratified_resample_data_indices(y_train, y_test, random_state=None):
200200
y_test : np.ndarray
201201
Test data labels.
202202
random_state : int, RandomState instance or None, default=None
203-
If `int`, random_state is the seed used by the random number generator;
204-
If `RandomState` instance, random_state is the random number generator;
205-
If `None`, the random number generator is the `RandomState` instance used
206-
by `np.random`.
203+
If ``int``, random_state is the seed used by the random number generator;
204+
If ``RandomState`` instance, random_state is the random number generator;
205+
If ``None``, the random number generator is the ``RandomState`` instance
206+
used by ``np.random``.
207207
208208
Returns
209209
-------

0 commit comments

Comments
 (0)