Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,15 @@ The currently implemented detectors are listed in the following table.
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.1007/978-3-540-75488-6_27">Nishida and Yamauchi (2007)</a></td>
</tr>
<tr>
<td rowspan="15" style="text-align: center; border: 1px solid grey; padding: 8px;">Data drift</td>
<td rowspan="13" style="text-align: center; border: 1px solid grey; padding: 8px;">Batch</td>
<td rowspan="8" style="text-align: center; border: 1px solid grey; padding: 8px;">Distance based</td>
<td rowspan="16" style="text-align: center; border: 1px solid grey; padding: 8px;">Data drift</td>
<td rowspan="14" style="text-align: center; border: 1px solid grey; padding: 8px;">Batch</td>
<td rowspan="9" style="text-align: center; border: 1px solid grey; padding: 8px;">Distance based</td>
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Anderson-Darling test</td>
<td style="text-align: center; border: 1px solid grey; padding: 8px;"><a href="https://doi.org/10.2307/2288805">Scholz and Stephens (1987)</a></td>
</tr>
<tr>
<td style="text-align: center; border: 1px solid grey; padding: 8px;">U</td>
<td style="text-align: center; border: 1px solid grey; padding: 8px;">N</td>
<td style="text-align: center; border: 1px solid grey; padding: 8px;">Bhattacharyya distance</td>
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/detectors/data_drift/batch.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ The {mod}`frouros.detectors.data_drift.batch` module contains batch data drift d
:toctree: auto_generated/
:template: class.md

AndersonDarlingTest
ChiSquareTest
CVMTest
KSTest
Expand Down
2 changes: 2 additions & 0 deletions frouros/detectors/data_drift/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data drift detection methods init."""

from .batch import ( # noqa: F401
AndersonDarlingTest,
BhattacharyyaDistance,
ChiSquareTest,
CVMTest,
Expand All @@ -19,6 +20,7 @@
from .streaming import IncrementalKSTest, MMD as MMDStreaming # noqa: N811

__all__ = [
"AndersonDarlingTest",
"BhattacharyyaDistance",
"ChiSquareTest",
"CVMTest",
Expand Down
2 changes: 2 additions & 0 deletions frouros/detectors/data_drift/batch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
MMD,
)
from .statistical_test import (
AndersonDarlingTest,
ChiSquareTest,
CVMTest,
KSTest,
Expand All @@ -19,6 +20,7 @@
)

__all__ = [
"AndersonDarlingTest",
"BhattacharyyaDistance",
"ChiSquareTest",
"CVMTest",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Data drift batch statistical test detection methods' init."""

from .anderson_darling import AndersonDarlingTest
from .chisquare import ChiSquareTest
from .cvm import CVMTest
from .ks import KSTest
from .mann_whitney_u import MannWhitneyUTest
from .welch_t_test import WelchTTest

__all__ = [
"AndersonDarlingTest",
"ChiSquareTest",
"CVMTest",
"KSTest",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Anderson-Darling test module."""

from typing import Optional, List, Union

import numpy as np # type: ignore
from scipy.stats import anderson_ksamp # type: ignore

from frouros.callbacks.batch.base import BaseCallbackBatch
from frouros.detectors.data_drift.base import NumericalData, UnivariateData
from frouros.detectors.data_drift.batch.statistical_test.base import (
BaseStatisticalTest,
StatisticalResult,
)


class AndersonDarlingTest(BaseStatisticalTest):
"""Anderson-Darling test [scholz1987k]_ detector.

:Note:
p-values are bounded between 0.001 and 0.25 according to scipy documentation [1]_.

:References:

.. [scholz1987k] Scholz, Fritz W., and Michael A. Stephens.
"K-sample Anderson–Darling tests."
Journal of the American Statistical Association 82.399 (1987): 918-924.
[1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson_ksamp.html # noqa: E501 # pylint: disable=line-too-long
"""

def __init__(
self,
callbacks: Optional[Union[BaseCallbackBatch, List[BaseCallbackBatch]]] = None,
) -> None:
"""Init method.

:param callbacks: callbacks
:type callbacks: Optional[Union[BaseCallbackBatch, List[BaseCallbackBatch]]]
"""
super().__init__(
data_type=NumericalData(),
statistical_type=UnivariateData(),
callbacks=callbacks,
)

def _statistical_test(
self, X_ref: np.ndarray, X: np.ndarray, **kwargs # noqa: N803
) -> StatisticalResult:
test = anderson_ksamp(
samples=[
X_ref,
X,
],
**kwargs,
)
test = StatisticalResult(
statistic=test.statistic,
p_value=test.pvalue,
)
return test
2 changes: 2 additions & 0 deletions frouros/tests/integration/test_data_drift.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
MMD,
)
from frouros.detectors.data_drift.batch import (
AndersonDarlingTest,
ChiSquareTest,
CVMTest,
KSTest,
Expand Down Expand Up @@ -160,6 +161,7 @@ def test_batch_distance_bins_based_univariate_same_distribution(
@pytest.mark.parametrize(
"detector, expected_statistic, expected_p_value",
[
(AndersonDarlingTest(), 23171.19994366, 0.001),
(CVMTest(), 3776.09848103, 5.38105056e-07),
(KSTest(), 0.99576271, 0.0),
(MannWhitneyUTest(), 6912.0, 0.0),
Expand Down