diff --git a/aif360/detectors/mdss/MDSS.py b/aif360/detectors/mdss/MDSS.py index 9d73f18c..fcd1c757 100644 --- a/aif360/detectors/mdss/MDSS.py +++ b/aif360/detectors/mdss/MDSS.py @@ -220,6 +220,14 @@ def scan(self, coordinates: pd.DataFrame, expectations: pd.Series, outcomes: pd. """ np.random.seed(seed) + # Reset indexes + coordinates = coordinates.reset_index(drop = True) + expectations = expectations.reset_index(drop = True) + outcomes = outcomes.reset_index(drop = True) + + assert len(coordinates) == len(expectations) == len(outcomes), \ + f'Lengths of coordinates, expectations, and outcomes should be equal.' + # Check that the appropriate scoring function is used if isinstance(self.scoring_function, BerkJones): diff --git a/aif360/detectors/mdss/ScoringFunctions/BerkJones.py b/aif360/detectors/mdss/ScoringFunctions/BerkJones.py index e39cb6e9..e09c5479 100644 --- a/aif360/detectors/mdss/ScoringFunctions/BerkJones.py +++ b/aif360/detectors/mdss/ScoringFunctions/BerkJones.py @@ -41,12 +41,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q: """ alpha = self.alpha - key = tuple([observed_sum, len(expectations), penalty, q, alpha]) - ans = self.score_cache.get(key) - if ans is not None: - self.cache_counter['score'] += 1 - return ans - if q < alpha: q = alpha @@ -57,7 +51,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q: ) if q == 1: ans = observed_sum * np.log(q / alpha) - penalty - self.score_cache[key] = ans return ans a = observed_sum * np.log(q / alpha) @@ -68,7 +61,6 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q: - penalty ) - self.score_cache[key] = ans return ans def qmle(self, observed_sum: float, expectations: np.array): @@ -81,24 +73,15 @@ def qmle(self, observed_sum: float, expectations: np.array): :return: q MLE """ alpha = self.alpha - - key = tuple([observed_sum, len(expectations), alpha]) - ans = self.qmle_cache.get(key) - if ans is not None: - self.cache_counter['qmle'] += 1 - return ans if len(expectations) == 0: - self.qmle_cache[key] = 0 return 0 else: q = observed_sum / len(expectations) if (q < alpha): - self.qmle_cache[key] = alpha return alpha - self.qmle_cache[key] = q return q def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float): @@ -110,13 +93,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float :param penalty: penalty coefficient """ alpha = self.alpha - - key = tuple([observed_sum, len(expectations), penalty, alpha]) - ans = self.compute_qs_cache.get(key) - if ans is not None: - self.cache_counter['qs'] += 1 - return ans - q_mle = self.qmle(observed_sum, expectations) if self.score(observed_sum, expectations, penalty, q_mle) > 0: @@ -134,5 +110,4 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float q_max = 0 ans = [exist, q_mle, q_min, q_max] - self.compute_qs_cache[key] = ans return ans diff --git a/aif360/detectors/mdss/ScoringFunctions/Bernoulli.py b/aif360/detectors/mdss/ScoringFunctions/Bernoulli.py index be3358eb..1da368fc 100644 --- a/aif360/detectors/mdss/ScoringFunctions/Bernoulli.py +++ b/aif360/detectors/mdss/ScoringFunctions/Bernoulli.py @@ -33,14 +33,7 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q: % (observed_sum, len(expectations), penalty, q) ) - key = tuple([observed_sum, expectations.tostring(), penalty, q]) - ans = self.score_cache.get(key) - if ans is not None: - self.cache_counter['score'] += 1 - return ans - ans = observed_sum * np.log(q) - np.log(1 - expectations + q * expectations).sum() - penalty - self.score_cache[key] = ans return ans def qmle(self, observed_sum: float, expectations: np.array): @@ -50,16 +43,8 @@ def qmle(self, observed_sum: float, expectations: np.array): :param observed_sum: sum of observed binary outcomes for all i :param expectations: predicted outcomes for each data element i """ - direction = self.direction - - key = tuple([observed_sum, expectations.tostring()]) - ans = self.qmle_cache.get(key) - if ans is not None: - self.cache_counter['qmle'] += 1 - return ans - + direction = self.direction ans = optim.bisection_q_mle(self, observed_sum, expectations, direction=direction) - self.qmle_cache[key] = ans return ans def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float): @@ -71,13 +56,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float :param penalty: penalty coefficient """ direction = self.direction - - key = tuple([observed_sum, expectations.tostring(), penalty]) - ans = self.compute_qs_cache.get(key) - if ans is not None: - self.cache_counter['qs'] += 1 - return ans - q_mle = self.qmle(observed_sum, expectations) if self.score(observed_sum, expectations, penalty, q_mle) > 0: @@ -95,7 +73,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max) ans = [exist, q_mle, q_min, q_max] - self.compute_qs_cache[key] = ans return ans def q_dscore(self, observed_sum:float, expectations:np.array, q:float): @@ -110,12 +87,5 @@ def q_dscore(self, observed_sum:float, expectations:np.array, q:float): :param q: current value of q :return: q dscore/dq """ - key = tuple([observed_sum, expectations.tostring(), q]) - ans = self.qdscore_cache.get(key) - if ans is not None: - self.cache_counter['qdscore'] += 1 - return ans - ans = observed_sum - (q * expectations / (1 - expectations + q * expectations)).sum() - self.qdscore_cache[key] = ans return ans diff --git a/aif360/detectors/mdss/ScoringFunctions/Gaussian.py b/aif360/detectors/mdss/ScoringFunctions/Gaussian.py index a385ba7d..af6cf82f 100644 --- a/aif360/detectors/mdss/ScoringFunctions/Gaussian.py +++ b/aif360/detectors/mdss/ScoringFunctions/Gaussian.py @@ -30,12 +30,6 @@ def score( :return: bias score for the current value of q """ - key = tuple([observed_sum, expectations.sum(), penalty, q]) - ans = self.score_cache.get(key) - if ans is not None: - self.cache_counter["score"] += 1 - return ans - assumed_var = self.var expected_sum = expectations.sum() penalty /= self.var @@ -56,7 +50,6 @@ def score( ans = 0 ans -= penalty - self.score_cache[key] = ans return ans @@ -64,12 +57,6 @@ def qmle(self, observed_sum: float, expectations: np.array): """ Computes the q which maximizes score (q_mle). """ - key = tuple([observed_sum, expectations.sum()]) - ans = self.qmle_cache.get(key) - if ans is not None: - self.cache_counter["qmle"] += 1 - return ans - expected_sum = expectations.sum() # Deals with case where observed_sum = expected_sum = 0 @@ -78,8 +65,7 @@ def qmle(self, observed_sum: float, expectations: np.array): else: ans = observed_sum / expected_sum - assert np.isnan(ans) == False, f'{expected_sum}, {observed_sum}, {ans}' - self.qmle_cache[key] = ans + assert np.isnan(ans) == False, f'{expected_sum}, {observed_sum}, {ans}' return ans def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float): @@ -94,13 +80,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float direction = self.direction q_mle = self.qmle(observed_sum, expectations) - - key = tuple([observed_sum, expectations.sum(), penalty]) - ans = self.compute_qs_cache.get(key) - if ans is not None: - self.cache_counter["qs"] += 1 - return ans - q_mle_score = self.score(observed_sum, expectations, penalty, q_mle) if q_mle_score > 0: @@ -118,5 +97,4 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max) ans = [exist, q_mle, q_min, q_max] - self.compute_qs_cache[key] = ans return ans diff --git a/aif360/detectors/mdss/ScoringFunctions/Poisson.py b/aif360/detectors/mdss/ScoringFunctions/Poisson.py index ff10e81a..02735b62 100644 --- a/aif360/detectors/mdss/ScoringFunctions/Poisson.py +++ b/aif360/detectors/mdss/ScoringFunctions/Poisson.py @@ -32,14 +32,8 @@ def score(self, observed_sum: float, expectations: np.array, penalty: float, q: "observed_sum=%.2f, expectations of length=%d, penalty=%.2f, q=%.2f" % (observed_sum, len(expectations), penalty, q) ) - key = tuple([observed_sum, expectations.sum(), penalty, q]) - ans = self.score_cache.get(key) - if ans is not None: - self.cache_counter['score'] += 1 - return ans ans = observed_sum * np.log(q) + (expectations - q * expectations).sum() - penalty - self.score_cache[key] = ans return ans def qmle(self, observed_sum: float, expectations: np.array): @@ -47,15 +41,7 @@ def qmle(self, observed_sum: float, expectations: np.array): Computes the q which maximizes score (q_mle). """ direction = self.direction - - key = tuple([observed_sum, expectations.sum()]) - ans = self.qmle_cache.get(key) - if ans is not None: - self.cache_counter['qmle'] += 1 - return ans - ans = optim.bisection_q_mle(self, observed_sum, expectations, direction=direction) - self.qmle_cache[key] = ans return ans def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float): @@ -68,15 +54,8 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float """ direction = self.direction - q_mle = self.qmle(observed_sum, expectations) - key = tuple([observed_sum, expectations.tostring(), penalty]) - ans = self.compute_qs_cache.get(key) - if ans is not None: - self.cache_counter['qs'] += 1 - return ans - if self.score(observed_sum, expectations, penalty, q_mle) > 0: exist = 1 q_min = optim.bisection_q_min(self, observed_sum, expectations, penalty, q_mle) @@ -92,7 +71,6 @@ def compute_qs(self, observed_sum: float, expectations: np.array, penalty: float exist, q_min, q_max = optim.direction_assertions(direction, q_min, q_max) ans = [exist, q_mle, q_min, q_max] - self.compute_qs_cache[key] = ans return ans def q_dscore(self, observed_sum, expectations, q): @@ -107,12 +85,5 @@ def q_dscore(self, observed_sum, expectations, q): :param q: current value of q :return: q dscore/dq """ - key = tuple([observed_sum, expectations.sum(), q]) - ans = self.qdscore_cache.get(key) - if ans is not None: - self.cache_counter['qdscore'] += 1 - return ans - ans = observed_sum - (q * expectations).sum() - self.qdscore_cache[key] = ans return ans diff --git a/aif360/detectors/mdss/ScoringFunctions/ScoringFunction.py b/aif360/detectors/mdss/ScoringFunctions/ScoringFunction.py index ec7c0672..f000e28b 100644 --- a/aif360/detectors/mdss/ScoringFunctions/ScoringFunction.py +++ b/aif360/detectors/mdss/ScoringFunctions/ScoringFunction.py @@ -15,20 +15,11 @@ def __init__(self, **kwargs): Journal of Computational and Graphical Statistics, 25(2), 382-404. """ self.kwargs = kwargs - self._reset() self.direction = kwargs.get('direction') directions = ['positive', 'negative'] assert self.direction in directions, f"Expected one of {directions}, got {self.direction}" - def _reset(self): - self.score_cache = {} - self.dscore_cache = {} - self.qdscore_cache = {} - self.qmle_cache = {} - self.compute_qs_cache = {} - self.cache_counter = {"score": 0, "dscore": 0, "qdscore": 0, "qmle": 0, "qs": 0} - def score( self, observed_sum: float, expectations: np.array, penalty: float, q: float ): diff --git a/examples/demo_mdss_detector.ipynb b/examples/demo_mdss_detector.ipynb index 465abbed..f9144eaf 100644 --- a/examples/demo_mdss_detector.ipynb +++ b/examples/demo_mdss_detector.ipynb @@ -348,7 +348,7 @@ { "data": { "text/plain": [ - "'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.3827815971689547'" + "'Our detected priviledged group has a size of 147, we observe 0.5374149659863946 as the average risk of recidivism, but our model predicts 0.38278159716895366'" ] }, "execution_count": 12, @@ -379,7 +379,7 @@ { "data": { "text/plain": [ - "'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.44470388217799317'" + "'Our detected priviledged group has a size of 732, we observe 0.3770491803278688 as the average risk of recidivism, but our model predicts 0.4447038821779929'" ] }, "execution_count": 14, @@ -784,7 +784,7 @@ { "data": { "text/plain": [ - "'Our detected privileged group has a size of 321, we observe 7844.840295856697 as the mean insurance costs, but our model predicts 5420.49326277455'" + "'Our detected privileged group has a size of 321, we observe 7844.8402958566985 as the mean insurance costs, but our model predicts 5420.493262774548'" ] }, "execution_count": 28, @@ -809,7 +809,7 @@ { "data": { "text/plain": [ - "'Our detected privileged group has a size of 115, we observe 21148.37389617392 as the mean insurance costs, but our model predicts 29694.035319112852'" + "'Our detected privileged group has a size of 115, we observe 21148.373896173915 as the mean insurance costs, but our model predicts 29694.035319112845'" ] }, "execution_count": 29, @@ -1152,7 +1152,7 @@ { "data": { "text/plain": [ - "'Our detected privileged group has a size of 31607, we observe 5.155584909121915 as the mean temperature, but our model predicts 11.932678437519867'" + "'Our detected privileged group has a size of 31607, we observe 5.155584909121934 as the mean temperature, but our model predicts 11.93267843751985'" ] }, "execution_count": 42, @@ -1176,7 +1176,7 @@ { "data": { "text/plain": [ - "'Our detected unprivileged group has a size of 55642, we observe 16.773802762911167 as the mean temperature, but our model predicts 11.932678437519867'" + "'Our detected unprivileged group has a size of 55642, we observe 16.773802762911078 as the mean temperature, but our model predicts 11.93267843751985'" ] }, "execution_count": 43, @@ -1533,7 +1533,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.8.12" } }, "nbformat": 4,