Skip to content

Commit

Permalink
move logic from __init__ to allow clone
Browse files Browse the repository at this point in the history
Signed-off-by: Samuel Hoffman <hoffman.sc@gmail.com>
  • Loading branch information
hoffmansc committed Jul 14, 2022
1 parent 17f0aba commit b198f86
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 86 deletions.
71 changes: 38 additions & 33 deletions aif360/sklearn/inprocessing/exponentiated_gradient_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
licensed under the MIT Licencse, Copyright Microsoft Corporation
"""
import fairlearn.reductions as red
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.preprocessing import LabelEncoder

from aif360.sklearn.utils import check_inputs
from aif360.sklearn.utils import check_inputs, check_groups


class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
Expand All @@ -27,18 +26,16 @@ class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
<https://arxiv.org/abs/1803.02453>`_
"""
def __init__(self,
prot_attr,
estimator,
constraints,
prot_attr=None,
eps=0.01,
T=50,
nu=None,
eta_mul=2.0,
drop_prot_attr=True):
"""
Args:
prot_attr: String or array-like column indices or column names of
protected attributes.
estimator: An estimator implementing methods ``fit(X, y,
sample_weight)`` and ``predict(X)``, where ``X`` is the matrix
of features, ``y`` is the vector of labels, and
Expand All @@ -52,6 +49,11 @@ def __init__(self,
`self.model.moments`. Otherwise, provide the desired
:class:`~fairlearn.reductions.Moment` object defining the
disparity constraints.
prot_attr (single label or list-like, optional): Protected
attribute(s) to use in the reduction process. If more than one
attribute, all combinations of values (intersections) are
considered. Default is ``None`` meaning all protected attributes
from the dataset are used.
eps: Allowed fairness constraint violation; the solution is
guaranteed to have the error within ``2*best_gap`` of the best
error under constraint eps; the constraint violation is at most
Expand All @@ -64,34 +66,15 @@ def __init__(self,
drop_prot_attr: Boolean flag indicating whether to drop protected
attributes from training data.
"""
self.prot_attr = prot_attr
self.moments = {
"DemographicParity": red.DemographicParity,
"EqualizedOdds": red.EqualizedOdds,
"TruePositiveRateDifference": red.TruePositiveRateDifference,
"ErrorRateRatio": red.ErrorRateRatio
}

if isinstance(constraints, str):
if constraints not in self.moments:
raise ValueError(f"Constraint not recognized: {constraints}")

self.moment = self.moments[constraints]()
elif isinstance(constraints, red.Moment):
self.moment = constraints
else:
raise ValueError("constraints must be a string or Moment object.")

self.estimator = estimator
self.constraints = constraints
self.prot_attr = prot_attr
self.eps = eps
self.T = T
self.nu = nu
self.eta_mul = eta_mul
self.drop_prot_attr = drop_prot_attr

self.model = red.ExponentiatedGradient(self.estimator, self.moment,
self.eps, self.T, self.nu, self.eta_mul)

def fit(self, X, y):
"""Learns randomized model with less bias
Expand All @@ -102,16 +85,38 @@ def fit(self, X, y):
Returns:
self
"""
A = X[self.prot_attr]
X, y, _ = check_inputs(X, y)
_, self.prot_attr_ = check_groups(X, self.prot_attr)
self.estimator_ = clone(self.estimator)

moments = {
"DemographicParity": red.DemographicParity,
"EqualizedOdds": red.EqualizedOdds,
"TruePositiveRateDifference": red.TruePositiveRateDifference,
"ErrorRateRatio": red.ErrorRateRatio
}
if isinstance(self.constraints, str):
if self.constraints not in moments:
raise ValueError(f"Constraint not recognized: {self.constraints}")
self.moment_ = moments[self.constraints]()
elif isinstance(self.constraints, red.Moment):
self.moment_ = self.constraints
else:
raise ValueError("constraints must be a string or Moment object.")

self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_,
eps=self.eps, T=self.T, nu=self.nu, eta_mul=self.eta_mul)

A = X[self.prot_attr_]

if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
X = X.drop(self.prot_attr_, axis=1)

le = LabelEncoder()
y = le.fit_transform(y)
self.classes_ = le.classes_

self.model.fit(X, y, sensitive_features=A)
self.model_.fit(X, y, sensitive_features=A)

return self

Expand All @@ -124,9 +129,9 @@ def predict(self, X):
numpy.ndarray: Predicted class label per sample.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
X = X.drop(self.prot_attr_, axis=1)

return self.classes_[self.model.predict(X)]
return self.classes_[self.model_.predict(X)]


def predict_proba(self, X):
Expand All @@ -144,6 +149,6 @@ def predict_proba(self, X):
``self.classes_``.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
X = X.drop(self.prot_attr_, axis=1)

return self.model._pmf_predict(X)
114 changes: 61 additions & 53 deletions aif360/sklearn/inprocessing/grid_search_reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
licensed under the MIT Licencse, Copyright Microsoft Corporation
"""
import fairlearn.reductions as red
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.base import BaseEstimator, ClassifierMixin, clone

from aif360.sklearn.utils import check_inputs, check_groups


class GridSearchReduction(BaseEstimator, ClassifierMixin):
Expand All @@ -33,9 +33,9 @@ class GridSearchReduction(BaseEstimator, ClassifierMixin):
<https://arxiv.org/abs/1905.12843>`_
"""
def __init__(self,
prot_attr,
estimator,
constraints,
prot_attr=None,
constraint_weight=0.5,
grid_size=10,
grid_limit=2.0,
Expand All @@ -47,8 +47,6 @@ def __init__(self,
):
"""
Args:
prot_attr: String or array-like column indices or column names
of protected attributes.
estimator: An estimator implementing methods ``fit(X, y,
sample_weight)`` and ``predict(X)``, where ``X`` is the matrix
of features, ``y`` is the vector of labels, and
Expand All @@ -62,6 +60,11 @@ def __init__(self,
`self.model.moments`. Otherwise, provide the desired
:class:`~fairlearn.reductions.Moment` object defining the
disparity constraints.
prot_attr (single label or list-like, optional): Protected
attribute(s) to use in the grid search. If more than one
attribute, all combinations of values (intersections) are
considered. Default is ``None`` meaning all protected attributes
from the dataset are used.
constraint_weight: When the ``selection_rule`` is
"tradeoff_optimization" (default, no other option currently)
this float specifies the relative weight put on the constraint
Expand All @@ -84,51 +87,17 @@ def __init__(self,
max_val: Loss function parameter for "Square" and "Absolute,"
typically the maximum of the range of y values.
"""
self.prot_attr = prot_attr
self.moments = {
"DemographicParity": red.DemographicParity,
"EqualizedOdds": red.EqualizedOdds,
"TruePositiveRateDifference": red.TruePositiveRateDifference,
"ErrorRateRatio": red.ErrorRateRatio,
"GroupLoss": red.GroupLossMoment
}

if isinstance(constraints, str):
if constraints not in self.moments:
raise ValueError(f"Constraint not recognized: {constraints}")

if constraints == "GroupLoss":
losses = {
"ZeroOne": red.ZeroOneLoss,
"Square": red.SquareLoss,
"Absolute": red.AbsoluteLoss
}

if loss == "ZeroOne":
self.loss = losses[loss]()
else:
self.loss = losses[loss](min_val, max_val)

self.moment = self.moments[constraints](loss=self.loss)
else:
self.moment = self.moments[constraints]()
elif isinstance(constraints, red.Moment):
self.moment = constraints
else:
raise ValueError("constraints must be a string or Moment object.")

self.estimator = estimator
self.constraints = constraints
self.prot_attr = prot_attr
self.constraint_weight = constraint_weight
self.grid_size = grid_size
self.grid_limit = grid_limit
self.grid = grid
self.drop_prot_attr = drop_prot_attr

self.model = red.GridSearch(estimator=self.estimator,
constraints=self.moment,
constraint_weight=self.constraint_weight,
grid_size=self.grid_size, grid_limit=self.grid_limit,
grid=self.grid)
self.loss = loss
self.min_val = min_val
self.max_val = max_val

def fit(self, X, y):
"""Train a less biased classifier or regressor with the given training
Expand All @@ -141,12 +110,51 @@ def fit(self, X, y):
Returns:
self
"""
A = X[self.prot_attr]
X, y, _ = check_inputs(X, y)
_, self.prot_attr_ = check_groups(X, self.prot_attr)
self.estimator_ = clone(self.estimator)

moments = {
"DemographicParity": red.DemographicParity,
"EqualizedOdds": red.EqualizedOdds,
"TruePositiveRateDifference": red.TruePositiveRateDifference,
"ErrorRateRatio": red.ErrorRateRatio,
"GroupLoss": red.GroupLossMoment
}
if isinstance(self.constraints, str):
if self.constraints not in moments:
raise ValueError(f"Constraint not recognized: {self.constraints}")
if self.constraints == "GroupLoss":
losses = {
"ZeroOne": red.ZeroOneLoss,
"Square": red.SquareLoss,
"Absolute": red.AbsoluteLoss
}
if self.loss == "ZeroOne":
self.loss_ = losses[self.loss]()
else:
self.loss_ = losses[self.loss](self.min_val, self.max_val)

self.moment_ = moments[self.constraints](loss=self.loss_)
else:
self.moment_ = moments[self.constraints]()
elif isinstance(self.constraints, red.Moment):
self.moment_ = self.constraints
else:
raise ValueError("constraints must be a string or Moment object.")

self.model_ = red.GridSearch(estimator=self.estimator_,
constraints=self.moment_,
constraint_weight=self.constraint_weight,
grid_size=self.grid_size, grid_limit=self.grid_limit,
grid=self.grid)

A = X[self.prot_attr_]

if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
X = X.drop(self.prot_attr_, axis=1)

self.model.fit(X, y, sensitive_features=A)
self.model_.fit(X, y, sensitive_features=A)

return self

Expand All @@ -160,9 +168,9 @@ def predict(self, X):
numpy.ndarray: Predicted output per sample.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr, axis=1)
X = X.drop(self.prot_attr_, axis=1)

return self.model.predict(X)
return self.model_.predict(X)


def predict_proba(self, X):
Expand All @@ -180,10 +188,10 @@ def predict_proba(self, X):
``self.classes_``.
"""
if self.drop_prot_attr:
X = X.drop(self.prot_attr)
X = X.drop(self.prot_attr_)

if isinstance(self.model.constraints, red.ClassificationMoment):
return self.model.predict_proba(X)
if isinstance(self.model_.constraints, red.ClassificationMoment):
return self.model_.predict_proba(X)

raise NotImplementedError("Underlying model does not support "
"predict_proba")

0 comments on commit b198f86

Please sign in to comment.