Fix docstring inconsistencies in benchmarking module (resolves #809) (#2735)

adityagh006 · web-flow · commit 05fb64c2109f · 2025-04-11T19:39:19.000+01:00
* issue#809 Fix docstrings for benchmarking functions * Fixed docstrings in results_loaders.py * Fix docstring inconsistencies in benchmarking module - resolves #809 * Fix docstring inconsistencies in benchmarking module - resolves #809
diff --git a/aeon/benchmarking/metrics/anomaly_detection/thresholding.py b/aeon/benchmarking/metrics/anomaly_detection/thresholding.py
@@ -38,8 +38,8 @@ def percentile_threshold(y_score: np.ndarray, percentile: int) -> float:
 def sigma_threshold(y_score: np.ndarray, factor: float = 2) -> float:
     r"""Calculate a threshold based on the standard deviation of the anomaly scores.
 
-    Computes a threshold :math:`\theta` based on the anomaly scoring's mean
-    :math:`\mu_s` and the standard deviation :math:`\sigma_s`, ignoring NaNs:
+    Computes a threshold :math:``\theta`` based on the anomaly scoring's mean
+    :math:``\mu_s`` and the standard deviation :math:``\sigma_s``, ignoring NaNs:
 
     .. math::
        \theta = \mu_{s} + x \cdot \sigma_{s}
@@ -49,7 +49,7 @@ def sigma_threshold(y_score: np.ndarray, factor: float = 2) -> float:
     y_score : np.ndarray
         Anomaly scores for each point of the time series of shape (n_instances,).
     factor : float
-        Number of standard deviations to use as threshold (:math:`x`).
+        Number of standard deviations to use as threshold (:math:``x``).
 
     Returns
     -------
@@ -62,14 +62,15 @@ def sigma_threshold(y_score: np.ndarray, factor: float = 2) -> float:
 def top_k_points_threshold(
     y_true: np.ndarray, y_score: np.ndarray, k: int | None = None
 ) -> float:
-    """Calculate a threshold such that at least `k` anomalous points are found.
+    """Calculate a threshold such that at least ``k`` anomalous points are found.
 
     The anomalies are single-point anomalies.
 
     Computes a threshold based on the number of expected anomalies (number of
     anomalies). This method iterates over all possible thresholds from high to low to
-    find the first threshold that yields `k` or more anomalous points. If `k` is `None`,
-    the ground truth data is used to calculate the real number of anomalies.
+    find the first threshold that yields ``k`` or more anomalous points. If ``k``
+    is ``None``,the ground truth data is used to calculate the real number of
+    anomalies.
 
     Parameters
     ----------
@@ -78,13 +79,13 @@ def top_k_points_threshold(
     y_score : np.ndarray
         Anomaly scores for each point of the time series of shape (n_instances,).
     k : optional int
-        Number of expected anomalies. If `k` is `None`, the ground truth data is used
-        to calculate the real number of anomalies.
+        Number of expected anomalies. If ``k`` is ``None``, the ground truth data
+        is used to calculate the real number of anomalies.
 
     Returns
     -------
     float
-        Threshold such that there are at least `k` anomalous points.
+        Threshold such that there are at least ``k`` anomalous points.
     """
     if k is None:
         return np.nanpercentile(y_score, (1 - y_true.sum() / y_true.shape[0]) * 100)
@@ -95,15 +96,15 @@ def top_k_points_threshold(
 def top_k_ranges_threshold(
     y_true: np.ndarray, y_score: np.ndarray, k: int | None = None
 ) -> float:
-    """Calculate a threshold such that at least `k` anomalies are found.
+    """Calculate a threshold such that at least ``k`` anomalies are found.
 
     The anomalies are either single-points anomalies or continuous anomalous ranges.
 
     Computes a threshold based on the number of expected anomalous subsequences /
     ranges (number of anomalies). This method iterates over all possible thresholds
     from high to low to find the first threshold that yields `k` or more continuous
-    anomalous ranges. If `k` is `None`, the ground truth data is used to calculate the
-    real number of anomalies (anomalous ranges).
+    anomalous ranges. If ``k`` is ``None``, the ground truth data is used to
+    calculate the real number of anomalies (anomalous ranges).
 
     Parameters
     ----------
@@ -112,13 +113,13 @@ def top_k_ranges_threshold(
     y_score : np.ndarray
         Anomaly scores for each point of the time series of shape (n_instances,).
     k : optional int
-        Number of expected anomalies. If `k` is `None`, the ground truth data is used
-        to calculate the real number of anomalies.
+        Number of expected anomalies. If ``k`` is ``None``, the ground truth data
+        is used to calculate the real number of anomalies.
 
     Returns
     -------
     float
-        Threshold such that there are at least `k` anomalous ranges.
+        Threshold such that there are at least ``k`` anomalous ranges.
     """
     if k is None:
         k = _count_anomaly_ranges(y_true)
diff --git a/aeon/benchmarking/metrics/segmentation.py b/aeon/benchmarking/metrics/segmentation.py
@@ -47,7 +47,7 @@ def hausdorff_error(
 
     .. seealso::
 
-       This function wraps :py:func:`scipy.spatial.distance.directed_hausdorff`
+       This function wraps :py:func:``scipy.spatial.distance.directed_hausdorff``
 
     Parameters
     ----------
@@ -56,7 +56,7 @@ def hausdorff_error(
     pred_change_points: array_like
         Integer indexes (positions) of predicted change points
     symmetric: bool
-        If `True` symmetric Hausdorff distance will be used
+        If ``True`` symmetric Hausdorff distance will be used
     seed: int, default=0
         Local numpy.random.RandomState seed. Default is 0, a random
         shuffling of u and v that guarantees reproducibility.
diff --git a/aeon/benchmarking/resampling.py b/aeon/benchmarking/resampling.py
@@ -32,10 +32,10 @@ def resample_data(X_train, y_train, X_test, y_test, random_state=None):
     y_test : np.ndarray
         Test data labels.
     random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
+        If ``int``, random_state is the seed used by the random number generator;
+        If ``RandomState`` instance, random_state is the random number generator;
+        If ``None``, the random number generator is the ``RandomState`` instance
+        used by ``np.random``.
 
     Returns
     -------
@@ -93,10 +93,10 @@ def resample_data_indices(y_train, y_test, random_state=None):
     y_test : np.ndarray
         Test data labels.
     random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
+        If ``int``, random_state is the seed used by the random number generator;
+        If ``RandomState`` instance, random_state is the random number generator;
+        If ``None``, the random number generator is the ``RandomState`` instance
+        used by ``np.random``.
 
     Returns
     -------
@@ -136,10 +136,10 @@ def stratified_resample_data(X_train, y_train, X_test, y_test, random_state=None
     y_test : np.ndarray
         Test data labels.
     random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
+        If ``int``, random_state is the seed used by the random number generator;
+        If ``RandomState`` instance, random_state is the random number generator;
+        If ``None``, the random number generator is the ``RandomState`` instance
+        used by ``np.random``.
 
     Returns
     -------
@@ -200,10 +200,10 @@ def stratified_resample_data_indices(y_train, y_test, random_state=None):
     y_test : np.ndarray
         Test data labels.
     random_state : int, RandomState instance or None, default=None
-        If `int`, random_state is the seed used by the random number generator;
-        If `RandomState` instance, random_state is the random number generator;
-        If `None`, the random number generator is the `RandomState` instance used
-        by `np.random`.
+        If ``int``, random_state is the seed used by the random number generator;
+        If ``RandomState`` instance, random_state is the random number generator;
+        If ``None``, the random number generator is the ``RandomState`` instance
+        used by ``np.random``.
 
     Returns
     -------