move logic from __init__ to allow clone

Signed-off-by: Samuel Hoffman <hoffman.sc@gmail.com>
Trusted-AI · Jul 14, 2022 · b198f86 · b198f86
1 parent 17f0aba
commit b198f86
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 86 deletions.
diff --git a/aif360/sklearn/inprocessing/exponentiated_gradient_reduction.py b/aif360/sklearn/inprocessing/exponentiated_gradient_reduction.py
@@ -5,11 +5,10 @@
 licensed under the MIT Licencse, Copyright Microsoft Corporation
 """
 import fairlearn.reductions as red
-import numpy as np
-from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
 from sklearn.preprocessing import LabelEncoder
 
-from aif360.sklearn.utils import check_inputs
+from aif360.sklearn.utils import check_inputs, check_groups
 
 
 class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
@@ -27,18 +26,16 @@ class ExponentiatedGradientReduction(BaseEstimator, ClassifierMixin):
            <https://arxiv.org/abs/1803.02453>`_
     """
     def __init__(self,
-                 prot_attr,
                  estimator,
                  constraints,
+                 prot_attr=None,
                  eps=0.01,
                  T=50,
                  nu=None,
                  eta_mul=2.0,
                  drop_prot_attr=True):
         """
         Args:
-            prot_attr: String or array-like column indices or column names of
-                protected attributes.
             estimator: An estimator implementing methods ``fit(X, y,
                 sample_weight)`` and ``predict(X)``, where ``X`` is the matrix
                 of features, ``y`` is the vector of labels, and
@@ -52,6 +49,11 @@ def __init__(self,
                 `self.model.moments`. Otherwise, provide the desired
                 :class:`~fairlearn.reductions.Moment` object defining the
                 disparity constraints.
+            prot_attr (single label or list-like, optional): Protected
+                attribute(s) to use in the reduction process. If more than one
+                attribute, all combinations of values (intersections) are
+                considered. Default is ``None`` meaning all protected attributes
+                from the dataset are used.
             eps: Allowed fairness constraint violation; the solution is
                 guaranteed to have the error within ``2*best_gap`` of the best
                 error under constraint eps; the constraint violation is at most
@@ -64,34 +66,15 @@ def __init__(self,
             drop_prot_attr: Boolean flag indicating whether to drop protected
                 attributes from training data.
         """
-        self.prot_attr = prot_attr
-        self.moments = {
-                "DemographicParity": red.DemographicParity,
-                "EqualizedOdds": red.EqualizedOdds,
-                "TruePositiveRateDifference": red.TruePositiveRateDifference,
-                "ErrorRateRatio": red.ErrorRateRatio
-        }
-
-        if isinstance(constraints, str):
-            if constraints not in self.moments:
-                raise ValueError(f"Constraint not recognized: {constraints}")
-
-            self.moment = self.moments[constraints]()
-        elif isinstance(constraints, red.Moment):
-            self.moment = constraints
-        else:
-            raise ValueError("constraints must be a string or Moment object.")
-
         self.estimator = estimator
+        self.constraints = constraints
+        self.prot_attr = prot_attr
         self.eps = eps
         self.T = T
         self.nu = nu
         self.eta_mul = eta_mul
         self.drop_prot_attr = drop_prot_attr
 
-        self.model = red.ExponentiatedGradient(self.estimator, self.moment,
-            self.eps, self.T, self.nu, self.eta_mul)
-
     def fit(self, X, y):
         """Learns randomized model with less bias
 
@@ -102,16 +85,38 @@ def fit(self, X, y):
         Returns:
             self
         """
-        A = X[self.prot_attr]
+        X, y, _ = check_inputs(X, y)
+        _, self.prot_attr_ = check_groups(X, self.prot_attr)
+        self.estimator_ = clone(self.estimator)
+
+        moments = {
+            "DemographicParity": red.DemographicParity,
+            "EqualizedOdds": red.EqualizedOdds,
+            "TruePositiveRateDifference": red.TruePositiveRateDifference,
+            "ErrorRateRatio": red.ErrorRateRatio
+        }
+        if isinstance(self.constraints, str):
+            if self.constraints not in moments:
+                raise ValueError(f"Constraint not recognized: {self.constraints}")
+            self.moment_ = moments[self.constraints]()
+        elif isinstance(self.constraints, red.Moment):
+            self.moment_ = self.constraints
+        else:
+            raise ValueError("constraints must be a string or Moment object.")
+
+        self.model_ = red.ExponentiatedGradient(self.estimator_, self.moment_,
+                eps=self.eps, T=self.T, nu=self.nu, eta_mul=self.eta_mul)
+
+        A = X[self.prot_attr_]
 
         if self.drop_prot_attr:
-            X = X.drop(self.prot_attr, axis=1)
+            X = X.drop(self.prot_attr_, axis=1)
 
         le = LabelEncoder()
         y = le.fit_transform(y)
         self.classes_ = le.classes_
 
-        self.model.fit(X, y, sensitive_features=A)
+        self.model_.fit(X, y, sensitive_features=A)
 
         return self
 
@@ -124,9 +129,9 @@ def predict(self, X):
             numpy.ndarray: Predicted class label per sample.
         """
         if self.drop_prot_attr:
-            X = X.drop(self.prot_attr, axis=1)
+            X = X.drop(self.prot_attr_, axis=1)
 
-        return self.classes_[self.model.predict(X)]
+        return self.classes_[self.model_.predict(X)]
 
 
     def predict_proba(self, X):
@@ -144,6 +149,6 @@ def predict_proba(self, X):
             ``self.classes_``.
         """
         if self.drop_prot_attr:
-            X = X.drop(self.prot_attr, axis=1)
+            X = X.drop(self.prot_attr_, axis=1)
 
         return self.model._pmf_predict(X)
diff --git a/aif360/sklearn/inprocessing/grid_search_reduction.py b/aif360/sklearn/inprocessing/grid_search_reduction.py
@@ -5,9 +5,9 @@
 licensed under the MIT Licencse, Copyright Microsoft Corporation
 """
 import fairlearn.reductions as red
-import numpy as np
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.preprocessing import LabelEncoder
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+
+from aif360.sklearn.utils import check_inputs, check_groups
 
 
 class GridSearchReduction(BaseEstimator, ClassifierMixin):
@@ -33,9 +33,9 @@ class GridSearchReduction(BaseEstimator, ClassifierMixin):
            <https://arxiv.org/abs/1905.12843>`_
     """
     def __init__(self,
-                prot_attr,
                 estimator,
                 constraints,
+                prot_attr=None,
                 constraint_weight=0.5,
                 grid_size=10,
                 grid_limit=2.0,
@@ -47,8 +47,6 @@ def __init__(self,
                 ):
         """
         Args:
-            prot_attr: String or array-like column indices or column names
-                of protected attributes.
             estimator: An estimator implementing methods ``fit(X, y,
                 sample_weight)`` and ``predict(X)``, where ``X`` is the matrix
                 of features, ``y`` is the vector of labels, and
@@ -62,6 +60,11 @@ def __init__(self,
                 `self.model.moments`. Otherwise, provide the desired
                 :class:`~fairlearn.reductions.Moment` object defining the
                 disparity constraints.
+            prot_attr (single label or list-like, optional): Protected
+                attribute(s) to use in the grid search. If more than one
+                attribute, all combinations of values (intersections) are
+                considered. Default is ``None`` meaning all protected attributes
+                from the dataset are used.
             constraint_weight: When the ``selection_rule`` is
                 "tradeoff_optimization" (default, no other option currently)
                 this float specifies the relative weight put on the constraint
@@ -84,51 +87,17 @@ def __init__(self,
             max_val: Loss function parameter for "Square" and "Absolute,"
                 typically the maximum of the range of y values.
         """
-        self.prot_attr = prot_attr
-        self.moments = {
-                "DemographicParity": red.DemographicParity,
-                "EqualizedOdds": red.EqualizedOdds,
-                "TruePositiveRateDifference": red.TruePositiveRateDifference,
-                "ErrorRateRatio": red.ErrorRateRatio,
-                "GroupLoss": red.GroupLossMoment
-        }
-
-        if isinstance(constraints, str):
-            if constraints not in self.moments:
-                raise ValueError(f"Constraint not recognized: {constraints}")
-
-            if constraints == "GroupLoss":
-                losses = {
-                        "ZeroOne": red.ZeroOneLoss,
-                        "Square": red.SquareLoss,
-                        "Absolute": red.AbsoluteLoss
-                }
-
-                if loss == "ZeroOne":
-                    self.loss = losses[loss]()
-                else:
-                    self.loss = losses[loss](min_val, max_val)
-
-                self.moment = self.moments[constraints](loss=self.loss)
-            else:
-                self.moment = self.moments[constraints]()
-        elif isinstance(constraints, red.Moment):
-            self.moment = constraints
-        else:
-            raise ValueError("constraints must be a string or Moment object.")
-
         self.estimator = estimator
+        self.constraints = constraints
+        self.prot_attr = prot_attr
         self.constraint_weight = constraint_weight
         self.grid_size = grid_size
         self.grid_limit = grid_limit
         self.grid = grid
         self.drop_prot_attr = drop_prot_attr
-
-        self.model = red.GridSearch(estimator=self.estimator,
-                constraints=self.moment,
-                constraint_weight=self.constraint_weight,
-                grid_size=self.grid_size, grid_limit=self.grid_limit,
-                grid=self.grid)
+        self.loss = loss
+        self.min_val = min_val
+        self.max_val = max_val
 
     def fit(self, X, y):
         """Train a less biased classifier or regressor with the given training
@@ -141,12 +110,51 @@ def fit(self, X, y):
         Returns:
             self
         """
-        A = X[self.prot_attr]
+        X, y, _ = check_inputs(X, y)
+        _, self.prot_attr_ = check_groups(X, self.prot_attr)
+        self.estimator_ = clone(self.estimator)
+
+        moments = {
+            "DemographicParity": red.DemographicParity,
+            "EqualizedOdds": red.EqualizedOdds,
+            "TruePositiveRateDifference": red.TruePositiveRateDifference,
+            "ErrorRateRatio": red.ErrorRateRatio,
+            "GroupLoss": red.GroupLossMoment
+        }
+        if isinstance(self.constraints, str):
+            if self.constraints not in moments:
+                raise ValueError(f"Constraint not recognized: {self.constraints}")
+            if self.constraints == "GroupLoss":
+                losses = {
+                    "ZeroOne": red.ZeroOneLoss,
+                    "Square": red.SquareLoss,
+                    "Absolute": red.AbsoluteLoss
+                }
+                if self.loss == "ZeroOne":
+                    self.loss_ = losses[self.loss]()
+                else:
+                    self.loss_ = losses[self.loss](self.min_val, self.max_val)
+
+                self.moment_ = moments[self.constraints](loss=self.loss_)
+            else:
+                self.moment_ = moments[self.constraints]()
+        elif isinstance(self.constraints, red.Moment):
+            self.moment_ = self.constraints
+        else:
+            raise ValueError("constraints must be a string or Moment object.")
+
+        self.model_ = red.GridSearch(estimator=self.estimator_,
+                constraints=self.moment_,
+                constraint_weight=self.constraint_weight,
+                grid_size=self.grid_size, grid_limit=self.grid_limit,
+                grid=self.grid)
+
+        A = X[self.prot_attr_]
 
         if self.drop_prot_attr:
-            X = X.drop(self.prot_attr, axis=1)
+            X = X.drop(self.prot_attr_, axis=1)
 
-        self.model.fit(X, y, sensitive_features=A)
+        self.model_.fit(X, y, sensitive_features=A)
 
         return self
 
@@ -160,9 +168,9 @@ def predict(self, X):
             numpy.ndarray: Predicted output per sample.
         """
         if self.drop_prot_attr:
-            X = X.drop(self.prot_attr, axis=1)
+            X = X.drop(self.prot_attr_, axis=1)
 
-        return self.model.predict(X)
+        return self.model_.predict(X)
 
 
     def predict_proba(self, X):
@@ -180,10 +188,10 @@ def predict_proba(self, X):
             ``self.classes_``.
         """
         if self.drop_prot_attr:
-            X = X.drop(self.prot_attr)
+            X = X.drop(self.prot_attr_)
 
-        if isinstance(self.model.constraints, red.ClassificationMoment):
-            return self.model.predict_proba(X)
+        if isinstance(self.model_.constraints, red.ClassificationMoment):
+            return self.model_.predict_proba(X)
 
         raise NotImplementedError("Underlying model does not support "
                                   "predict_proba")