mlr-org · be-marc · Aug 20, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 22, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -22,6 +22,7 @@ Depends:
     R (>= 3.1.0)
 Imports:
     checkmate,
+    mlr3misc,
     PRROC
 Suggests:
     testthat (>= 3.0.0)

diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ export(mauc_au1p)
 export(mauc_au1u)
 export(mauc_aunp)
 export(mauc_aunu)
+export(mauc_mu)
 export(maxae)
 export(maxse)
 export(mbrier)

diff --git a/NEWS.md b/NEWS.md
@@ -2,11 +2,12 @@
 
 * Added new measure `linex` (Linear-Exponential Loss).
 * Added new measure `pinball` (Average Pinball Loss).
+* Added new measure `mauc_mu` (Mu AUC).
 
 # mlr3measures 0.6.0
 
 * Added binary classification measures `gmean` and `gpr`.
-* Added multiclass `mcc`.
+* Added new measure `mcc` (Matthews Correlation Coefficient).
 
 # mlr3measures 0.5.0
 

diff --git a/R/bibentries.R b/R/bibentries.R
@@ -183,6 +183,17 @@ bibentries = c( # nolint start
     year             = "2009",
     doi              = "10.1109/TKDE.2008.239"
   ),
+  kleinman_2019      = bibentry("incollection",
+    title            = "AUC mu: A Performance Metric for Multi-Class Machine Learning Models",
+    author           = "Ross Kleiman and David Page",
+    booktitle        = "Proceedings of the 36th International Conference on Machine Learning",
+    pages            = "3439--3447",
+    year             = "2019",
+    editor           = "Chaudhuri, Kamalika and Salakhutdinov, Ruslan",
+    volume           = "97",
+    series           = "Proceedings of Machine Learning Research",
+    publisher        = "PMLR"
+  ),
   varian_1975 = bibentry("incollection",
     title        = "A Bayesian Approach to Real Estate Assessment",
     author       = "Varian, Hal R.",

diff --git a/R/classif_auc.R b/R/classif_auc.R
@@ -20,15 +20,14 @@
 #'   while considering the a-priori distribution of the classes as suggested
 #'   in Ferri (2009). Note we deviate from the definition in
 #'   Ferri (2009) by a factor of `c`.
-#'   The person implementing this function and writing this very
-#'   documentation right now cautions against using this measure because it is
-#'   an imperfect generalization of AU1U.
+#' * *MU*: Multiclass AUC as defined in Kleinman and Page (2019).
+#'   This measure is an average of the pairwise AUCs between all classes.
 #'
 #' @templateVar mid mauc_aunu
 #' @template classif_template
 #'
 #' @references
-#' `r format_bib("fawcett_2001", "ferri_2009", "hand_2001")`
+#' `r format_bib("fawcett_2001", "ferri_2009", "hand_2001", "kleinman_2019")`
 #'
 #' @inheritParams classif_params
 #' @template classif_example
@@ -83,11 +82,57 @@ mauc_au1p = function(truth, prob, na_value = NaN, ...) {
   sum(c(m + t(m)) * c(weights)) / (2L * (nlevels(truth) - 1L))
 }
 
+#' @rdname mauc_aunu
+#' @export
+mauc_mu = function(truth, prob, na_value = NaN, ...) {
+  assert_classif(truth, prob = prob)
+
+  if (length(unique(truth)) != nlevels(truth)) {
+    warning("Measure is undefined if there isn't at least one sample per class.")
+    return(na_value)
+  }
+
+  n_classes = nlevels(truth)
+
+  # partition matrix
+  a = matrix(1, n_classes, n_classes) - diag(n_classes)
+  rownames(a) = levels(truth)
+
+  # iterate over all pairwise combinations of classes
+  pairwise_combinations = combn(levels(truth), 2, simplify = FALSE)
+  aucs = mlr3misc::map_dbl(pairwise_combinations, function(pair) {
+    # subset predictions to instances where the true class is one of the two paired classes
+    class_i = pair[1]
+    preds_i = prob[truth == class_i, , drop = FALSE]
+    n_i = nrow(preds_i)
+    class_j = pair[2]
+    preds_j = prob[truth == class_j, , drop = FALSE]
+    n_j = nrow(preds_j)
+
+    # calculate pairwise scores
+    temp_preds = rbind(preds_i, preds_j)
+    temp_labels = c(rep(0, n_i), rep(1, n_j))
+    v = a[class_i, ] - a[class_j, ]
+    scores = temp_preds %*% v
+
+    # calculate binary auc
+    i = which(temp_labels == 1)
+    n_pos = length(i)
+    n_neg = length(temp_labels) - n_pos
+
+    r = rank(scores, ties.method = "average")
+    (mean(r[i]) - (as.numeric(n_pos) + 1) / 2) / as.numeric(n_neg)
+  })
+
+  sum(aucs * 1 / length(aucs))
+}
+
 #' @include measures.R
 add_measure(mauc_aunu, "Average 1 vs. rest multiclass AUC", "classif", 0, 1, FALSE)
 add_measure(mauc_aunp, "Weighted average 1 vs. rest multiclass AUC", "classif", 0, 1, FALSE)
 add_measure(mauc_au1u, "Average 1 vs. 1 multiclass AUC", "classif", 0, 1, FALSE)
 add_measure(mauc_au1p, "Weighted average 1 vs. 1 multiclass AUC", "classif", 0, 1, FALSE)
+add_measure(mauc_mu, "Multiclass mu AUC", "classif", 0, 1, FALSE)
 
 # returns a numeric length nlevel(truth), with one-vs-rest AUC
 onevrestauc = function(prob, truth) {

diff --git a/man/mauc_aunu.Rd b/man/mauc_aunu.Rd
diff --git a/man/measures.Rd b/man/measures.Rd
diff --git a/tests/testthat/test_classif.R b/tests/testthat/test_classif.R
@@ -148,6 +148,7 @@ test_that("multiclass auc", {
   expect_equal(mauc_aunp(truth, prob), 1)
   expect_equal(mauc_au1u(truth, prob), 1)
   expect_equal(mauc_au1p(truth, prob), 1)
+  expect_equal(mauc_mu(truth, prob), 1)
 
   auc(truth = factor(c("a", "nota", "nota")), prob = c(1, 0, 0), positive = "a")
 
@@ -173,13 +174,14 @@ test_that("multiclass auc", {
   expect_equal(mauc_aunp(equalizer_truth, maxent_prob), 0.5)
   expect_equal(mauc_aunu(equalizer_truth, maxent_prob), 0.5)
   expect_equal(mauc_au1u(equalizer_truth, maxent_prob), 0.5)
+  expect_equal(mauc_mu(equalizer_truth, maxent_prob), 0.5)
 
   # reversing prob gives 1 - auc
   expect_equal(mauc_aunu(truth, prob), 1 - mauc_aunu(truth, 1 - prob))
   expect_equal(mauc_aunp(truth, prob), 1 - mauc_aunp(truth, 1 - prob))
   expect_equal(mauc_au1u(truth, prob), 1 - mauc_au1u(truth, 1 - prob))
   expect_equal(mauc_au1p(truth, prob), 1 - mauc_au1p(truth, 1 - prob))
-
+  expect_equal(mauc_mu(truth, prob), 1 - mauc_mu(truth, 1 - prob))
 
   # manually calculate au1u, au1p
   compmat = sapply(levels(truth), function(t1) {
@@ -201,5 +203,4 @@ test_that("multiclass auc", {
 
   expect_equal(mauc_aunu(truth, prob), mean(compvec))
   expect_equal(mauc_aunp(truth, prob), sum(compvec * table(truth) / length(truth)))
-
 })