|
8 | 8 | from frouros.data_drift.batch.base import DataDriftBatchBase
|
9 | 9 | from frouros.data_drift.batch.distance_based import (
|
10 | 10 | EMD,
|
| 11 | + Hellinger, |
11 | 12 | HistogramIntersection,
|
12 | 13 | PSI,
|
13 | 14 | JS,
|
@@ -56,7 +57,6 @@ def test_batch_distance_based_categorical(
|
56 | 57 | "detector, expected_distance",
|
57 | 58 | [
|
58 | 59 | (EMD(), 0.54726161),
|
59 |
| - (PSI(), 496.21968934), |
60 | 60 | (JS(), 0.81451218),
|
61 | 61 | (KL(), np.inf),
|
62 | 62 | (HistogramIntersection(), 0.97669491),
|
@@ -84,6 +84,71 @@ def test_batch_distance_based_univariate(
|
84 | 84 | assert np.isclose(distance, expected_distance)
|
85 | 85 |
|
86 | 86 |
|
| 87 | +@pytest.mark.parametrize( |
| 88 | + "detector, expected_distance", |
| 89 | + [(PSI(), 468.79410784), (Hellinger(), 0.77137797)], |
| 90 | +) |
| 91 | +def test_batch_distance_bins_based_univariate_different_distribution( |
| 92 | + univariate_distribution_p: Tuple[float, float], |
| 93 | + univariate_distribution_q: Tuple[float, float], |
| 94 | + detector: DataDriftBatchBase, |
| 95 | + expected_distance: float, |
| 96 | + num_samples: int = 500, |
| 97 | +) -> None: |
| 98 | + """Test distance based univariate different distribution method. |
| 99 | +
|
| 100 | + :param univariate_distribution_p: mean and standard deviation of distribution p |
| 101 | + :type univariate_distribution_p: Tuple[float, float] |
| 102 | + :param univariate_distribution_q: mean and standard deviation of distribution q |
| 103 | + :type univariate_distribution_q: Tuple[float, float] |
| 104 | + :param detector: detector distance |
| 105 | + :type detector: DataDriftBatchBase |
| 106 | + :param expected_distance: expected p-value value |
| 107 | + :type expected_distance: float |
| 108 | + """ |
| 109 | + np.random.seed(seed=31) |
| 110 | + X_ref = np.random.normal(*univariate_distribution_p, size=num_samples) # noqa: N806 |
| 111 | + X_test = np.random.normal( # noqa: N806 |
| 112 | + *univariate_distribution_q, size=num_samples |
| 113 | + ) |
| 114 | + |
| 115 | + detector.fit(X=X_ref) |
| 116 | + distance = detector.compare(X=X_test) |
| 117 | + |
| 118 | + assert np.isclose(distance, expected_distance) |
| 119 | + |
| 120 | + |
| 121 | +@pytest.mark.parametrize( |
| 122 | + "detector, expected_distance", |
| 123 | + [(PSI(), 0.01840072), (Hellinger(), 0.04792538)], |
| 124 | +) |
| 125 | +def test_batch_distance_bins_based_univariate_same_distribution( |
| 126 | + univariate_distribution_p: Tuple[float, float], |
| 127 | + detector: DataDriftBatchBase, |
| 128 | + expected_distance: float, |
| 129 | + num_samples: int = 500, |
| 130 | +) -> None: |
| 131 | + """Test distance based univariate same distribution method. |
| 132 | +
|
| 133 | + :param univariate_distribution_p: mean and standard deviation of distribution p |
| 134 | + :type univariate_distribution_p: Tuple[float, float] |
| 135 | + :param detector: detector distance |
| 136 | + :type detector: DataDriftBatchBase |
| 137 | + :param expected_distance: expected p-value value |
| 138 | + :type expected_distance: float |
| 139 | + """ |
| 140 | + np.random.seed(seed=31) |
| 141 | + X_ref = np.random.normal(*univariate_distribution_p, size=num_samples) # noqa: N806 |
| 142 | + X_test = np.random.normal( # noqa: N806 |
| 143 | + *univariate_distribution_p, size=num_samples |
| 144 | + ) |
| 145 | + |
| 146 | + detector.fit(X=X_ref) |
| 147 | + distance = detector.compare(X=X_test) |
| 148 | + |
| 149 | + assert np.isclose(distance, expected_distance) |
| 150 | + |
| 151 | + |
87 | 152 | @pytest.mark.parametrize(
|
88 | 153 | "detector, expected_statistic, expected_p_value",
|
89 | 154 | [
|
|
0 commit comments