XGBoost autologging: support per-class importance plots (mlflow#4523)

* Impl + test Signed-off-by: dbczumar <corey.zumar@databricks.com> * Format Signed-off-by: dbczumar <corey.zumar@databricks.com> * Fix offsets Signed-off-by: dbczumar <corey.zumar@databricks.com> * Uncomment Signed-off-by: dbczumar <corey.zumar@databricks.com> * Spacing fix Signed-off-by: dbczumar <corey.zumar@databricks.com> * Address review comments Signed-off-by: dbczumar <corey.zumar@databricks.com>
BLACKBUCK-LABS · Jul 6, 2021 · e0e7181 · e0e7181
1 parent f4eae61
commit e0e7181
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 7 deletions.
diff --git a/mlflow/xgboost.py b/mlflow/xgboost.py
@@ -434,26 +434,84 @@ def log_feature_importance_plot(features, importance, importance_type):
             Log feature importance plot.
             """
             import matplotlib.pyplot as plt
+            from cycler import cycler
 
             features = np.array(features)
-            importance = np.array(importance)
-            indices = np.argsort(importance)
-            features = features[indices]
-            importance = importance[indices]
+
+            # Structure the supplied `importance` values as a `num_features`-by-`num_classes` matrix
+            importances_per_class_by_feature = np.array(importance)
+            if importances_per_class_by_feature.ndim <= 1:
+                # In this case, the supplied `importance` values are not given per class. Rather,
+                # one importance value is given per feature. For consistency with the assumed
+                # `num_features`-by-`num_classes` matrix structure, we coerce the importance
+                # values to a `num_features`-by-1 matrix
+                indices = np.argsort(importance)
+                # Sort features and importance values by magnitude during transformation to a
+                # `num_features`-by-`num_classes` matrix
+                features = features[indices]
+                importances_per_class_by_feature = np.array(
+                    [[importance] for importance in importances_per_class_by_feature[indices]]
+                )
+                # In this case, do not include class labels on the feature importance plot because
+                # only one importance value has been provided per feature, rather than an
+                # one importance value for each class per feature
+                label_classes_on_plot = False
+            else:
+                importance_value_magnitudes = np.abs(importances_per_class_by_feature).sum(axis=1)
+                indices = np.argsort(importance_value_magnitudes)
+                features = features[indices]
+                importances_per_class_by_feature = importances_per_class_by_feature[indices]
+                label_classes_on_plot = True
+
+            num_classes = importances_per_class_by_feature.shape[1]
             num_features = len(features)
 
             # If num_features > 10, increase the figure height to prevent the plot
             # from being too dense.
             w, h = [6.4, 4.8]  # matplotlib's default figure size
             h = h + 0.1 * num_features if num_features > 10 else h
+            h = h + 0.1 * num_classes if num_classes > 1 else h
             fig, ax = plt.subplots(figsize=(w, h))
+            # When importance values are provided for each class per feature, we want to ensure
+            # that the same color is used for all bars in the bar chart that have the same class
+            colors_to_cycle = plt.rcParams["axes.prop_cycle"].by_key()["color"][:num_classes]
+            color_cycler = cycler(color=colors_to_cycle)
+            ax.set_prop_cycle(color_cycler)
+
+            # The following logic operates on one feature at a time, adding a bar to the bar chart
+            # for each class that reflects the importance of the feature to predictions of that
+            # class
+            feature_ylocs = np.arange(num_features)
+            # Define offsets on the y-axis that are used to evenly space the bars for each class
+            # around the y-axis position of each feature
+            offsets_per_yloc = np.linspace(-0.5, 0.5, num_classes) / 2 if num_classes > 1 else [0]
+            for feature_idx, (feature_yloc, importances_per_class) in enumerate(
+                zip(feature_ylocs, importances_per_class_by_feature)
+            ):
+                for class_idx, (offset, class_importance) in enumerate(
+                    zip(offsets_per_yloc, importances_per_class)
+                ):
+                    (bar,) = ax.barh(
+                        feature_yloc + offset,
+                        class_importance,
+                        align="center",
+                        # Set the bar height such that importance value bars for a particular
+                        # feature are spaced properly relative to each other (no overlap or gaps)
+                        # and relative to importance value bars for other features
+                        height=(0.5 / max(num_classes - 1, 1)),
+                    )
+                    if label_classes_on_plot and feature_idx == 0:
+                        # Only set a label the first time a bar for a particular class is plotted to
+                        # avoid duplicate legend entries. If we were to set a label for every bar,
+                        # the legend would contain `num_features` labels for each class.
+                        bar.set_label("Class {}".format(class_idx))
 
-            yloc = np.arange(num_features)
-            ax.barh(yloc, importance, align="center", height=0.5)
-            ax.set_yticks(yloc)
+            ax.set_yticks(feature_ylocs)
             ax.set_yticklabels(features)
             ax.set_xlabel("Importance")
             ax.set_title("Feature Importance ({})".format(importance_type))
+            if label_classes_on_plot:
+                ax.legend()
             fig.tight_layout()
 
             tmpdir = tempfile.mkdtemp()

diff --git a/tests/xgboost/test_xgboost_autolog.py b/tests/xgboost/test_xgboost_autolog.py
@@ -296,6 +296,40 @@ def test_xgb_autolog_logs_specified_feature_importance(bst_params, dtrain):
         assert loaded_imp == model.get_score(importance_type=imp_type)
 
 
+@pytest.mark.large
+@pytest.mark.skipif(
+    Version(xgb.__version__) <= Version("1.4.2"),
+    reason=(
+        "In XGBoost <= 1.4.2, linear boosters do not support `get_score()` for importance value"
+        " creation."
+    ),
+)
+def test_xgb_autolog_logs_feature_importance_for_linear_boosters(dtrain):
+    mlflow.xgboost.autolog()
+
+    bst_params = {"objective": "multi:softprob", "num_class": 3, "booster": "gblinear"}
+    model = xgb.train(bst_params, dtrain)
+
+    run = get_latest_run()
+    run_id = run.info.run_id
+    artifacts_dir = run.info.artifact_uri.replace("file://", "")
+    client = mlflow.tracking.MlflowClient()
+    artifacts = [x.path for x in client.list_artifacts(run_id)]
+
+    importance_type = "weight"
+    plot_name = "feature_importance_{}.png".format(importance_type)
+    assert plot_name in artifacts
+
+    json_name = "feature_importance_{}.json".format(importance_type)
+    assert json_name in artifacts
+
+    json_path = os.path.join(artifacts_dir, json_name)
+    with open(json_path, "r") as f:
+        loaded_imp = json.load(f)
+
+    assert loaded_imp == model.get_score(importance_type=importance_type)
+
+
 @pytest.mark.large
 def test_no_figure_is_opened_after_logging(bst_params, dtrain):
     mlflow.xgboost.autolog()