From 01b77d487c81a1bb9b41d8f085fde4d3c9a9b827 Mon Sep 17 00:00:00 2001 From: Andrii Kliachkin Date: Fri, 10 Nov 2023 20:35:31 +0100 Subject: [PATCH] Add methods for dealing with fairness in rankings (#461) --- aif360/algorithms/postprocessing/__init__.py | 1 + .../postprocessing/deterministic_reranking.py | 208 +++++ aif360/metrics/__init__.py | 1 + aif360/metrics/regression_metric.py | 100 +++ docs/source/modules/algorithms.rst | 1 + examples/demo_deterministic_reranking.ipynb | 734 ++++++++++++++++++ tests/test_deterministic_reranking.py | 80 ++ tests/test_regression_metric.py | 39 + 8 files changed, 1164 insertions(+) create mode 100644 aif360/algorithms/postprocessing/deterministic_reranking.py create mode 100644 aif360/metrics/regression_metric.py create mode 100644 examples/demo_deterministic_reranking.ipynb create mode 100644 tests/test_deterministic_reranking.py create mode 100644 tests/test_regression_metric.py diff --git a/aif360/algorithms/postprocessing/__init__.py b/aif360/algorithms/postprocessing/__init__.py index a47949a3..a9e13d86 100644 --- a/aif360/algorithms/postprocessing/__init__.py +++ b/aif360/algorithms/postprocessing/__init__.py @@ -1,3 +1,4 @@ from aif360.algorithms.postprocessing.calibrated_eq_odds_postprocessing import CalibratedEqOddsPostprocessing from aif360.algorithms.postprocessing.eq_odds_postprocessing import EqOddsPostprocessing from aif360.algorithms.postprocessing.reject_option_classification import RejectOptionClassification +from aif360.algorithms.postprocessing.deterministic_reranking import DeterministicReranking \ No newline at end of file diff --git a/aif360/algorithms/postprocessing/deterministic_reranking.py b/aif360/algorithms/postprocessing/deterministic_reranking.py new file mode 100644 index 00000000..5472ce0a --- /dev/null +++ b/aif360/algorithms/postprocessing/deterministic_reranking.py @@ -0,0 +1,208 @@ +import numpy as np +import pandas as pd + +from aif360.algorithms import Transformer +from aif360.datasets import StructuredDataset, RegressionDataset + +class DeterministicReranking(Transformer): + """A collection of algorithms for construction of fair ranked candidate lists. [1]_ . + + References: + .. [1] Sahin Cem Geyik, Stuart Ambler, and Krishnaram Kenthapadi, + "Fairness-Aware Ranking in Search & Recommendation Systems with Application to LinkedIn Talent Search," + KDD '19: Proceedings of the 25th ACM SIGKDD International Conference + on Knowledge Discovery & Data Mining, July 2019, Pages 2221-2231. + """ + + def __init__(self, + unprivileged_groups, + privileged_groups): + """ + Args: + unprivileged_groups (list(dict)): Representation for the unprivileged + group. + privileged_groups (list(dict)): Representation for the privileged + group. + """ + + super(DeterministicReranking, self).__init__( + unprivileged_groups=unprivileged_groups, + privileged_groups=privileged_groups) + + self.unprivileged_groups = unprivileged_groups + self.privileged_groups = privileged_groups + self._n_groups = len(unprivileged_groups) + len(privileged_groups) + self.s = set(unprivileged_groups[0].keys()) + self.s_vals = set(self.unprivileged_groups[0].values()).union(set(self.privileged_groups[0].values())) + + def fit(self, dataset: RegressionDataset): + if list(self.unprivileged_groups[0].keys())[0] != list(self.privileged_groups[0].keys())[0]: + raise ValueError("Different sensitive attributes (not values) specified for unprivileged and privileged groups.") + + items = dataset.convert_to_dataframe()[0] + items = items.sort_values(axis=0, by=dataset.label_names[0], ascending=False) + + if not(self.s.issubset(items.columns)): + raise ValueError(f"The dataset must contain the protected attribute(s): '{self.s}'.") + + # if we have just 1 protected attribute + if not isinstance(self.s, list) and False: + self._item_groups = [items[items[self.s] == ai] for ai in self.s_vals] + else: + self._item_groups = [] + for group in self.unprivileged_groups + self.privileged_groups: + q = ' & '.join( + [f'{s_i}=="{v_i}"' if isinstance(v_i, str) else f'{s_i}=={v_i}' + for s_i, v_i in group.items()] + ) + self._item_groups.append(items.query(q)) + + # self._item_groups = {ai: it for ai, it in zip( + # self.s_vals, [items[items[self.s] == ai] for ai in self.s_vals])} + + return self + + def predict(self, + dataset: RegressionDataset, + rec_size: int, + target_prop: list, + rerank_type: str='Constrained', + renormalize_scores: bool=False + ) -> RegressionDataset: + """Construct a ranking of candidates in the dataset according to specified proportions of groups. + + Args: + dataset (RegressionDataset): Dataset to rerank. + rec_size (int): Number of candidates in the output. + target_prop (list): Desired proportion of each group in the output. + rerank_type: Greedy, Conservative, Relaxed, or Constrained. Determines the type of algorithm \ + as described in the original paper. + renormalize_scores: renormalize label (score) values in the resulting ranking. If True, uses the default \ + behavior of RegressionDataset. + + Returns: + RegressionDataset: The reranked dataset. + """ + + + if rec_size <= 0: + raise ValueError(f"Output size should be greater than 0, got {rec_size}.") + # if np.any(set(target_prop.keys()) != set(self.s_vals)): + # raise ValueError("""Proportion specifications do not match. \ + # `target_prop` should have sensitive attribute values as keys.""") + if len(dataset.label_names) != 1: + raise ValueError(f"Dataset must have exactly one label, got {len(dataset.label_names)}.") + if rerank_type not in ['Greedy', 'Conservative', 'Relaxed', 'Constrained']: + raise ValueError(f'`rerank_type` must be one of `Greedy`, `Conservative`, `Relaxed`, `Constrained`; got {rerank_type}') + + # group_counts = {a: 0 for a in self.s_vals} + group_counts = [0] * self._n_groups + rankedItems = [] + score_label = dataset.label_names[0] + + if rerank_type != 'Constrained': + for k in range(1, rec_size+1): + below_min, below_max = [], [] + # get the best-scoring candidate item from each group + candidates = [ + candidates_gi.iloc[group_counts[g_i]] for g_i, candidates_gi in enumerate(self._item_groups) + ] + for group_idx in range(self._n_groups): + # best unranked items for each group + if group_counts[group_idx] < np.floor(k*target_prop[group_idx]): + below_min.append(group_idx) + elif group_counts[group_idx] < np.ceil(k*target_prop[group_idx]): + below_max.append(group_idx) + # if some groups are currently underrepresented + if len(below_min) != 0: + # choose the best next item among currently underrepresented groups + candidates_bmin = [candidates[group_idx] for group_idx in below_min] + next_group, next_item = max(enumerate(candidates_bmin), key = lambda x: x[1][score_label]) + # if minimal representation requirements are satisfied + else: + # if Greedy, add the highest scoring candidate among the groups + if rerank_type == 'Greedy': + candidates_bmax = [candidates[group_idx] for group_idx in below_max] + next_group, next_item = max(enumerate(candidates_bmax), key = lambda x: x[1][score_label]) + # if Conservative, add the candidate from the group least represented so far + elif rerank_type == 'Conservative': + # group_rep = [np.ceil(k*target_prop[group])/target_prop[group] for group in below_max] + # sort by how close the groups are to violating the condition, in case of tie sort by best element score + next_group = min(below_max, key=lambda group_idx: + (np.ceil(k*target_prop[group_idx])/target_prop[group_idx], + -candidates[group_idx][score_label])) + next_item = candidates[next_group] + # if Relaxed, relax the conservative requirements + elif rerank_type == 'Relaxed': + next_group = min(below_max, key=lambda group_idx: + (np.ceil(np.ceil(k*target_prop[group_idx])/target_prop[group_idx]), + -candidates[group_idx][score_label]) + ) + next_item = candidates[next_group] + + rankedItems.append(next_item) + group_counts[next_group] += 1 + + elif rerank_type == 'Constrained': + rankedItems, maxIndices = [], [] + group_counts, min_counts = [0] * self._n_groups, [0] * self._n_groups + + lastEmpty, k = 0, 0 + while lastEmpty < rec_size: + k+=1 + # determine the minimum feasible counts of each group at current rec. list size + min_counts_at_k = [np.floor(p_gi*k) for p_gi in target_prop] + # get sensitive attr. values for which the current minimum count has increased + # since last one + changed_mins = [] + for group_idx in range(self._n_groups): + if min_counts_at_k[group_idx] > min_counts[group_idx]: + changed_mins.append(group_idx) + + if len(changed_mins) > 0: + # get the list of candidates to insert and sort them by their score + changed_items = [] + # save the candidate AND the index of the group it belongs to + for group_idx in changed_mins: + changed_items.append((group_idx, self._item_groups[group_idx].iloc[group_counts[group_idx]])) + changed_items.sort(key=lambda x: -x[1][score_label]) + + # add the candidate items, starting with the best score + for newitem in changed_items: + if len(rankedItems) == rec_size: + break + maxIndices.append(k-1) + rankedItems.append(newitem[1]) + swapInd = lastEmpty + while swapInd > 0 and maxIndices[swapInd-1] >= swapInd and rankedItems[swapInd-1][score_label] < rankedItems[swapInd][score_label]: + maxIndices[swapInd-1], maxIndices[swapInd] = maxIndices[swapInd], maxIndices[swapInd-1] + rankedItems[swapInd-1], rankedItems[swapInd] = rankedItems[swapInd], rankedItems[swapInd-1] + swapInd -= 1 + lastEmpty+=1 + group_counts[newitem[0]] += 1 + min_counts = min_counts_at_k + + res_df = pd.DataFrame(rankedItems, columns=dataset.feature_names + [score_label]) + res = RegressionDataset(res_df, + dep_var_name=dataset.label_names[0], + protected_attribute_names=dataset.protected_attribute_names, + privileged_classes=dataset.privileged_protected_attributes) + if not renormalize_scores: + res.labels = np.transpose([res_df[score_label]]) + res.scores = np.transpose([res_df[score_label]]) + return res + + + def fit_predict(self, + dataset: RegressionDataset, + rec_size: int, + target_prop: dict, + rerank_type: str='Constrained', + renormalize_scores: bool=False + ) -> RegressionDataset: + self.fit(dataset=dataset) + return self.predict(dataset=dataset, + rec_size=rec_size, + target_prop=target_prop, + rerank_type=rerank_type, + renormalize_scores=renormalize_scores) \ No newline at end of file diff --git a/aif360/metrics/__init__.py b/aif360/metrics/__init__.py index 0a9338af..c913699b 100644 --- a/aif360/metrics/__init__.py +++ b/aif360/metrics/__init__.py @@ -4,3 +4,4 @@ from aif360.metrics.classification_metric import ClassificationMetric from aif360.metrics.sample_distortion_metric import SampleDistortionMetric from aif360.metrics.mdss_classification_metric import MDSSClassificationMetric +from aif360.metrics.regression_metric import RegressionDatasetMetric diff --git a/aif360/metrics/regression_metric.py b/aif360/metrics/regression_metric.py new file mode 100644 index 00000000..0ccf2185 --- /dev/null +++ b/aif360/metrics/regression_metric.py @@ -0,0 +1,100 @@ +import numpy as np +from aif360.metrics import DatasetMetric +from aif360.datasets import RegressionDataset + + +class RegressionDatasetMetric(DatasetMetric): + """Class for computing metrics based on a single + :obj:`~aif360.datasets.RegressionDataset`. + """ + + def __init__(self, dataset, unprivileged_groups=None, privileged_groups=None): + """ + Args: + dataset (RegressionDataset): A RegressionDataset. + privileged_groups (list(dict)): Privileged groups. Format is a list + of `dicts` where the keys are `protected_attribute_names` and + the values are values in `protected_attributes`. Each `dict` + element describes a single group. See examples for more details. + unprivileged_groups (list(dict)): Unprivileged groups in the same + format as `privileged_groups`. + + Raises: + TypeError: `dataset` must be a + :obj:`~aif360.datasets.RegressionDataset` type. + """ + if not isinstance(dataset, RegressionDataset): + raise TypeError("'dataset' should be a RegressionDataset") + + # sets self.dataset, self.unprivileged_groups, self.privileged_groups + super(RegressionDatasetMetric, self).__init__(dataset, + unprivileged_groups=unprivileged_groups, + privileged_groups=privileged_groups) + + def infeasible_index(self, target_prop: dict, r: int = None): + """ + Infeasible Index metric, as described in [1]_. + + Args: + target_prop (dict): desired proportion of groups. + r (int): size of the candidate list over which the metric is calculated. + Defaults to the size of the dataset. + + Returns: + A tuple (int, set{int}): InfeasibleIndex and the positions at which the + feasibility condition is violated. + + References: + .. [1] Sahin Cem Geyik, Stuart Ambler, and Krishnaram Kenthapadi, + "Fairness-Aware Ranking in Search & Recommendation Systems with Application to LinkedIn Talent Search," + KDD '19: Proceedings of the 25th ACM SIGKDD International Conference + on Knowledge Discovery & Data Mining, July 2019, Pages 2221-2231. + """ + pr_attr_values = np.ravel( + self.dataset.unprivileged_protected_attributes + self.dataset.privileged_protected_attributes) + if set(list(target_prop.keys())) != set(pr_attr_values): + raise ValueError('Desired proportions must be specified for all values of the protected attributes!') + + ranking = np.column_stack((self.dataset.scores, self.dataset.protected_attributes)) + if r is None: + r = np.ravel(self.dataset.scores).shape[0] + ii = 0 + k_viol = set() + for k in range(1, r): + rk = ranking[:k] + for ai in pr_attr_values: + count_ai = rk[rk[:,1] == ai].shape[0] + if count_ai < np.floor(target_prop[ai]*k): + ii+=1 + k_viol.add(k-1) + return ii, list(k_viol) + + def discounted_cum_gain(self, r: int = None, full_dataset: RegressionDataset=None, normalized=False): + """ + Discounted Cumulative Gain metric. + + Args: + r (int): position up to which to calculate the DCG. If not specified, is set to the size of the dataset. + normalized (bool): return normalized DCG. + + Returns: + The calculated DCG. + """ + if r is None: + r = np.ravel(self.dataset.scores).shape[0] + if r < 0: + raise ValueError(f'r must be >= 0, got {r}') + if normalized == True and full_dataset is None: + raise ValueError('`normalized` is set to True, but `full_dataset` is not specified') + if not isinstance(full_dataset, RegressionDataset) and not (full_dataset is None): + raise TypeError(f'`full_datset`: expected `RegressionDataset`, got {type(full_dataset)}') + scores = np.ravel(self.dataset.scores)[:r] + z = self._dcg(scores) + if normalized: + z /= self._dcg(np.sort(np.ravel(full_dataset.scores))[::-1][:r]) + return z + + def _dcg(self, scores): + logs = np.log2(np.arange(2, len(scores)+2)) + z = np.sum(scores/logs) + return z diff --git a/docs/source/modules/algorithms.rst b/docs/source/modules/algorithms.rst index 740c4cb1..96a5ab0d 100644 --- a/docs/source/modules/algorithms.rst +++ b/docs/source/modules/algorithms.rst @@ -57,6 +57,7 @@ Algorithms algorithms.postprocessing.CalibratedEqOddsPostprocessing algorithms.postprocessing.EqOddsPostprocessing algorithms.postprocessing.RejectOptionClassification + algorithms.postprocessing.DeterministicReranking :mod:`aif360.algorithms` ======================== diff --git a/examples/demo_deterministic_reranking.ipynb b/examples/demo_deterministic_reranking.ipynb new file mode 100644 index 00000000..27d2b20b --- /dev/null +++ b/examples/demo_deterministic_reranking.ipynb @@ -0,0 +1,734 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### This notebook demonstrates the capabilities of the DeterministicReranking algorithm.\n", + "The algorithm provides a way to construct balanced rankings of candidates based on precalculated scores.\n", + "\n", + "*Based on: [Sahin Cem Geyik, Stuart Ambler, & Krishnaram Kenthapadi (2019). Fairness-Aware Ranking in Search & Recommendation Systems with Application to LinkedIn Talent Search. In Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining](https://doi.org/10.48550/arXiv.1905.01989).*\n", + "\n", + "The notebook is organized as follows:\n", + "1. Introduction;\n", + "2. A toy example;\n", + "3. Theoretical background." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1. Introduction" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ranking algorithms are at the core of search and recommendation systems used in, among others, hiring, college admissions, and web-searches. It is clear that algorithmic bias in such cases can create or amplify unacceptable discrimination based on race, gender, or other attributes. Thus, a way of constructing \"fair\" rankings is needed." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Algorithms presented here take as input a **dataset that has already been ranked** (scored), possibly by some other machine learning model, and order them in a way that satisfies specified **fairness requirements**, expressed in a form of a **distribution over the protected attributes**." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. A toy example" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's say we have a collection of 10 balls: 5 red and 5 blue. We assign each of them a score from 0 to 100; however, for some reason the red balls have a higher average score than the blue ones." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Red mean score: 73.0\n", + "Blue mean score: 50.0\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "balls = pd.DataFrame([['r', 100],['r', 90],['r', 85],['r', 70],['b', 70],['b', 60],['b', 50],['b', 40],['b', 30],['r', 20]],\n", + " columns=['color', 'score'])\n", + "\n", + "print(f\"Red mean score: {np.mean(balls[balls['color'] == 'r']['score'])}\")\n", + "print(f\"Blue mean score: {np.mean(balls[balls['color'] == 'b']['score'])}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, say we want to take the 6 best balls based on their score. In real life, the need for this limited \"sub-ranking\" may arise for many reasons. For example, the landing page of our ball-selling website may only have space for 6 items. \n", + "\n", + "This is similar to the case presented in the original paper, where the algorithm is used to rank job-seekers for recruiters on LinkedIn." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
colorscore
0r100
1r90
2r85
3r70
4b70
5b60
\n", + "
" + ], + "text/plain": [ + " color score\n", + "0 r 100\n", + "1 r 90\n", + "2 r 85\n", + "3 r 70\n", + "4 b 70\n", + "5 b 60" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "balls.sort_values(by='score', ascending=False)[:6]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course, we notice that we have only 2 blue balls in this ranking, and it is in the last position! On one hand, it seems fair in terms of scores. However, we may want a more **equal representation** of different colors in the ranking. The possible motivations for that in real life are clear.\n", + "\n", + "In our case, the color is the **protected attribute**, and the red balls are a **privileged class**." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We may get a fairer ranking using the `DeterministicReranking` class:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:\n", + "`load_boston` has been removed from scikit-learn since version 1.2.\n", + "\n", + "The Boston housing prices dataset has an ethical problem: as\n", + "investigated in [1], the authors of this dataset engineered a\n", + "non-invertible variable \"B\" assuming that racial self-segregation had a\n", + "positive impact on house prices [2]. Furthermore the goal of the\n", + "research that led to the creation of this dataset was to study the\n", + "impact of air quality but it did not give adequate demonstration of the\n", + "validity of this assumption.\n", + "\n", + "The scikit-learn maintainers therefore strongly discourage the use of\n", + "this dataset unless the purpose of the code is to study and educate\n", + "about ethical issues in data science and machine learning.\n", + "\n", + "In this special case, you can fetch the dataset from the original\n", + "source::\n", + "\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + " data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + " raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", + " data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + " target = raw_df.values[1::2, 2]\n", + "\n", + "Alternative datasets include the California housing dataset and the\n", + "Ames housing dataset. You can load the datasets as follows::\n", + "\n", + " from sklearn.datasets import fetch_california_housing\n", + " housing = fetch_california_housing()\n", + "\n", + "for the California housing dataset and::\n", + "\n", + " from sklearn.datasets import fetch_openml\n", + " housing = fetch_openml(name=\"house_prices\", as_frame=True)\n", + "\n", + "for the Ames housing dataset.\n", + "\n", + "[1] M Carlisle.\n", + "\"Racist data destruction?\"\n", + "\n", + "\n", + "[2] Harrison Jr, David, and Daniel L. Rubinfeld.\n", + "\"Hedonic housing prices and the demand for clean air.\"\n", + "Journal of environmental economics and management 5.1 (1978): 81-102.\n", + "\n", + ": LawSchoolGPADataset will be unavailable. To install, run:\n", + "pip install 'aif360[LawSchoolGPA]'\n", + "WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n", + "pip install 'aif360[AdversarialDebiasing]'\n", + "WARNING:root:No module named 'tensorflow': AdversarialDebiasing will be unavailable. To install, run:\n", + "pip install 'aif360[AdversarialDebiasing]'\n", + "WARNING:root:No module named 'fairlearn': ExponentiatedGradientReduction will be unavailable. To install, run:\n", + "pip install 'aif360[Reductions]'\n", + "WARNING:root:No module named 'fairlearn': GridSearchReduction will be unavailable. To install, run:\n", + "pip install 'aif360[Reductions]'\n", + "c:\\Users\\andre\\miniconda3\\envs\\aif\\lib\\site-packages\\torch\\_functorch\\deprecated.py:58: UserWarning: We've integrated functorch into PyTorch. As the final step of the integration, functorch.vmap is deprecated as of PyTorch 2.0 and will be deleted in a future version of PyTorch >= 2.3. Please use torch.vmap instead; see the PyTorch 2.0 release notes and/or the torch.func migration guide for more details https://pytorch.org/docs/master/func.migrating.html\n", + " warn_deprecated('vmap', 'torch.vmap')\n", + "WARNING:root:No module named 'fairlearn': GridSearchReduction will be unavailable. To install, run:\n", + "pip install 'aif360[Reductions]'\n" + ] + } + ], + "source": [ + "from aif360.datasets import RegressionDataset\n", + "from aif360.algorithms.postprocessing.deterministic_reranking import DeterministicReranking" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize a RegressionDataset with color as the protected attribute and red as the privileged class.\n", + "balls_ds = RegressionDataset(df=balls, dep_var_name='score', protected_attribute_names=['color'], privileged_classes=[['r']])\n", + "# keep the un-normalized scores for clarity; RegressionDataset normalizes them to be from 0 to 1.\n", + "balls_ds.labels = np.transpose([balls['score']])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To initialize the DeterministicReranking class, we need to pass the **protected attribute values** of the privileged and unprivileged groups.\n", + "We do that using a list of dictionaries for each group, with the **name of the attribute as the key**. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# The RegressionDataset class automatically maps the privileged attribute value (color='red') to 1 and the other to 0.\n", + "dr = DeterministicReranking(unprivileged_groups=[{'color': 0}], privileged_groups=[{'color': 1}])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use the `fit_predict` method to get the ranking. The arguments are:\n", + "- `dataset` is the dataset to construct a ranking from;\n", + "- `rec_size` is the **size** of the ranking we need - in our case 6;\n", + "- `target_prop` is the **desired proportion** of items of each group in the ranking in the form of dictionary; the keys are the corresponding protected attribute values. We need equal representation, so we pass `{0: 0.5, 1: 0.5}`;\n", + "- `rerank_type` is the algorithm to use; for further details, skip to section 4 of this notebook. For now, we stick to the default `Constrained`;\n", + "- `renormalize_scores` will normalize the scores in the result so that the lowest is 0 and the highest is 1. Default is `False`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
colorscore
01.0100.0
40.070.0
11.090.0
50.060.0
21.085.0
60.050.0
\n", + "
" + ], + "text/plain": [ + " color score\n", + "0 1.0 100.0\n", + "4 0.0 70.0\n", + "1 1.0 90.0\n", + "5 0.0 60.0\n", + "2 1.0 85.0\n", + "6 0.0 50.0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fair_ranking = dr.fit_predict(dataset=balls_ds, rec_size=6, target_prop=[0.5, 0.5], rerank_type='Constrained')\n", + "fair_ranking.convert_to_dataframe()[0]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this result, the proportions are equal. Additionally, as the algorithm goes through positions in the ranking one-by-one, checking each time for violations of fairness, the items belonging to the unprivileged group (blue balls) **aren't all at the \"bottom\"** of the ranking." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can complicate the task a little by adding another protected attribute: let's call it `size`, which can be \"large\" or \"small\"." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
colorsizescore
01.01.01.0000
11.00.00.8750
21.01.00.8125
31.00.00.6250
40.01.00.6250
50.00.00.5000
60.01.00.3750
70.00.00.2500
80.01.00.1250
91.01.00.0000
\n", + "
" + ], + "text/plain": [ + " color size score\n", + "0 1.0 1.0 1.0000\n", + "1 1.0 0.0 0.8750\n", + "2 1.0 1.0 0.8125\n", + "3 1.0 0.0 0.6250\n", + "4 0.0 1.0 0.6250\n", + "5 0.0 0.0 0.5000\n", + "6 0.0 1.0 0.3750\n", + "7 0.0 0.0 0.2500\n", + "8 0.0 1.0 0.1250\n", + "9 1.0 1.0 0.0000" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "balls['size'] = ['l', 's', 'l', 's', 'l', 's', 'l', 's', 'l', 'l']\n", + "balls_ds = RegressionDataset(df=balls, dep_var_name='score',\n", + " protected_attribute_names=['color', 'size'],\n", + " privileged_classes=[['r'], ['l']])\n", + "balls_ds.convert_to_dataframe()[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's say that we want all possible combinations of the values to be represented equally in the output. We can do so by specifying more than one privileged/unprivileged group (the distinction between privileged and unprivileged groups isn't relevant in this algorihtm)." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
colorsizescore
01.01.01.0000
11.00.00.8750
40.01.00.6250
50.00.00.5000
21.01.00.8125
31.00.00.6250
\n", + "
" + ], + "text/plain": [ + " color size score\n", + "0 1.0 1.0 1.0000\n", + "1 1.0 0.0 0.8750\n", + "4 0.0 1.0 0.6250\n", + "5 0.0 0.0 0.5000\n", + "2 1.0 1.0 0.8125\n", + "3 1.0 0.0 0.6250" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dr = DeterministicReranking(unprivileged_groups=[{'color': 0, 'size': 0}, {'color': 0, 'size': 1}, {'color': 1, 'size': 0}],\n", + " privileged_groups=[{'color': 1, 'size': 1}])\n", + "fair_ranking = dr.fit_predict(dataset=balls_ds, rec_size=6, target_prop=[0.25, 0.25, 0.25, 0.25])\n", + "fair_ranking.convert_to_dataframe()[0]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3. Variations of the algorithm" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `predict` and `fit_predict` methods of the algorithm include a `rerank_type` parameter. It refers to the different algorithms described in the original paper: **Greedy**, **Conservative**, **Relaxed** and **Constrained**. The difference between them lies in how, given a desired proportion of groups, they choose the next element in the ranking." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before getting into the details, we need to formalize the properties we want our ranking to satisfy:\n", + "- $\\forall k \\le |\\tau_r|, \\quad \\forall a \\in A: \\quad count_k(a) \\ge \\lfloor p_a*k \\rfloor$,\n", + "- $\\forall k \\le |\\tau_r|, \\quad \\forall a \\in A: \\quad count_k(a) \\le \\lceil p_a*k \\rceil$\n", + "\n", + "where $|\\tau_r|$ is the size of the ranking, $A$ is the set of groups, $count_k(a)$ is the number of elements of group $a$ in first $k$ elements of the ranking, and $p_a$ is the desired proportion of elements belonging to the group $a$ in the ranking.\n", + "\n", + "We refer to these properties as the minimum and the maximum representation constraints, respectively.\n", + "\n", + "The demand for the properties to be satisfied at every $k$ comes from the observation that the position of an element in the ranking can have a significant impact on the response of the user." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **Greedy** variant works as follows:\n", + "- If there are any groups for which the minimum representation constraint is violated (the proportion of the group in the ranking is too small), choose the element with the highest score among those groups;\n", + "- Otherwise, choose the element with the highest score among groups that do not violate the maximum constraint (i.e. aren't overrepresented)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **Conservative** and **Relaxed** variants give preference to underrepresented groups:\n", + "- If there are any groups for which the minimum representation constraint is violated, choose the element with the highest score among those groups (analogous to Greedy)\n", + "- Otherwise, among groups that do not violate the maximum constraint, pick the group that minimizes $\\frac{\\lceil p_a*k \\rceil}{p_a}$ if Conservative or $\\lceil\\frac{\\lceil p_a*k \\rceil}{p_a}\\rceil$ if Relaxed. From this group, choose the element with the highest score." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The **Constrained** variant differs significantly from both of the previous ones:\n", + "- Starting with 0, increase the value of *k* until the minimum representation constraint is increased for at least one group. If there are more than one such groups, order them according to the descending score of their highest-scoring candidates not yet in the ranking.\n", + "- For each group in the list above:\n", + " 1. Insert the next candidate from the group to the next empty index in the ranking\n", + " 2. Swap the candidate towards earlier indices until:\n", + " - Either the score of the candidate in the earlier index is larger, or,\n", + " - Swapping will violate the minimum condition for the group of the candidate in the earlier index." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Experimental results (see reference) show that all algorithms exhibit similar performance in terms of both list utility and fairness. The Greedy algorithm generates ranked lists with higher utility, but doesn't strictly adhere to the fairness constraints; among the rest, Constrained shows the best utility." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aif", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/test_deterministic_reranking.py b/tests/test_deterministic_reranking.py new file mode 100644 index 00000000..5628eb77 --- /dev/null +++ b/tests/test_deterministic_reranking.py @@ -0,0 +1,80 @@ +import unittest +import pandas as pd +from aif360.datasets import RegressionDataset +from aif360.algorithms.postprocessing.deterministic_reranking import DeterministicReranking + +dataset = RegressionDataset(pd.DataFrame([ + ['r', 100], + ['r', 90], + ['r', 80], + ['r', 70], + ['b', 60], + ['b', 50], + ['b', 40], + ['b', 30], + ['b', 20], + ['r', 10], +], columns=['s', 'score']), dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']]) + +class TestInputValidation(unittest.TestCase): + def test_one_group(self): + with self.assertRaises(Exception): + d = DeterministicReranking([{'a': 0}, {'b': 0}], [{'a': 0}, {'b': 0}]) + d.fit(dataset) + def test_diff_attr_names(self): + with self.assertRaises(Exception): + d = DeterministicReranking([{'a': 0}], [{'b': 0}]) + d.fit(dataset) + def test_rec_size(self): + with self.assertRaises(Exception): + d = DeterministicReranking([{'a': 0}], [{'b': 0}]) + d.fit(dataset) + d.predict(dataset, rec_size=-1, target_prop=[0.5, 0.5]) + def test_prop_len(self): + with self.assertRaises(Exception): + d = DeterministicReranking([{'a': 0}], [{'b': 0}]) + d.fit(dataset) + d.predict(dataset, rec_size=1, target_prop=[0.5]) + +class TestValues(unittest.TestCase): + def __init__(self, methodName: str = "runTest") -> None: + + self.d = DeterministicReranking(privileged_groups=[{'s': 1}], unprivileged_groups=[{'s': 0}]) + self.d.fit(dataset) + super().__init__(methodName) + + def test_wrong_type(self): + with self.assertRaises(ValueError): + self.d.predict( + dataset, rec_size=6, target_prop=[0.5, 0.5], rerank_type='WRONG').convert_to_dataframe()[0] + + def test_greedy(self): + ds = self.d.predict( + dataset, rec_size=6, target_prop=[0.5, 0.5], rerank_type='Greedy').convert_to_dataframe()[0] + actual = len(ds[ds['s'] == 1])/len(ds) + expected = 0.5 + assert actual == expected + + def test_conserv(self): + ds = self.d.predict( + dataset, rec_size=6, target_prop=[0.5, 0.5], rerank_type='Conservative').convert_to_dataframe()[0] + actual = len(ds[ds['s'] == 1])/len(ds) + expected = 0.5 + assert actual == expected + + def test_relaxed(self): + ds = self.d.predict( + dataset, rec_size=6, target_prop=[0.5, 0.5], rerank_type='Relaxed').convert_to_dataframe()[0] + actual = len(ds[ds['s'] == 1])/len(ds) + expected = 0.5 + assert actual == expected + + def test_constrained(self): + ds = self.d.predict( + dataset, rec_size=6, target_prop=[0.5, 0.5], rerank_type='Constrained').convert_to_dataframe()[0] + actual = len(ds[ds['s'] == 1])/len(ds) + expected = 0.5 + assert actual == expected + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/tests/test_regression_metric.py b/tests/test_regression_metric.py new file mode 100644 index 00000000..7829e660 --- /dev/null +++ b/tests/test_regression_metric.py @@ -0,0 +1,39 @@ +from aif360.metrics import RegressionDatasetMetric +from aif360.datasets import RegressionDataset +import numpy as np +import pandas as pd + +df = pd.DataFrame([ + ['r', 55], + ['b', 65], + ['b', 85], + ['b', 70], + ['r', 60], + ['r', 50], + ['r', 40], + ['b', 30], + ['r', 20], + ['b', 10], +], columns=['s', 'score']) + +dataset = RegressionDataset(df, dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']]) +# sorted_dataset = RegressionDataset(df, dep_var_name='score', protected_attribute_names=['s'], privileged_classes=[['r']]) + + +m = RegressionDatasetMetric(dataset=dataset, + privileged_groups=[{'s': 1}], + unprivileged_groups=[{'s': 0}]) + +def test_infeasible_index(): + actual = m.infeasible_index(target_prop={1: 0.5, 0: 0.5}, r=10) + expected = (1, [3]) + assert actual == expected, f'Infeasible Index calculated wrong, got {actual}, expected {expected}' + +def test_dcg(): + actual = m.discounted_cum_gain(normalized=False) + expected = 2.6126967369231484 + assert abs(actual - expected) < 1e-6 + +def test_ndcg(): + actual = m.discounted_cum_gain(normalized=True, full_dataset=dataset) + expected = 0.9205433036318259 \ No newline at end of file