Remove redundant label from group-variable (#366)

* Remove redundant label from group-variable Fixes #364 * fix by * comment * fix * Update estimate_contrasts.R * fix, add tests * Update DESCRIPTION * Update DESCRIPTION * fix * Update format.R * Update format.R * Update get_marginalcontrasts.R * Update get_marginalcontrasts.R * Remove redundant label from group-variable Fixes #364 * Update format.R * fix * Update get_marginalcontrasts.R * Update get_marginalcontrasts.R * fix for contrasting slopes * prepare * ups * fix * update tests * Update test-estimate_contrasts.R * Update get_marginalcontrasts.R * Update get_marginalcontrasts.R * update snapshots * Update ordinal.md * Update test-estimate_means.R * examples * Update get_marginalcontrasts.R * Update test-estimate_contrasts.R
easystats · Jan 27, 2025 · a6a70d8 · a6a70d8
1 parent 395e1b8
commit a6a70d8
Show file tree

Hide file tree

Showing 15 changed files with 331 additions and 303 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: modelbased
 Title: Estimation of Model-Based Predictions, Contrasts and Means
-Version: 0.8.9.103
+Version: 0.8.9.104
 Authors@R:
     c(person(given = "Dominique",
              family = "Makowski",
@@ -89,3 +89,4 @@ Roxygen: list(markdown = TRUE)
 Config/Needs/check: stan-dev/cmdstanr
 Config/Needs/website: easystats/easystatstemplate
 LazyData: true
+Remotes: vincentarelbundock/marginaleffects
diff --git a/R/estimate_contrasts.R b/R/estimate_contrasts.R
@@ -79,11 +79,6 @@
 #' data$cyl <- as.factor(data$cyl)
 #' data$am <- as.factor(data$am)
 #'
-#' model <- rstanarm::stan_glm(mpg ~ cyl * am, data = data, refresh = 0)
-#' estimate_contrasts(model)
-#' # fix `am` at value 1
-#' estimate_contrasts(model, contrast = "cyl", by = "am='1'")
-#'
 #' model <- rstanarm::stan_glm(mpg ~ cyl * wt, data = data, refresh = 0)
 #' estimate_contrasts(model)
 #' estimate_contrasts(model, by = "wt", length = 4)
@@ -116,11 +111,6 @@ estimate_contrasts <- function(model,
     predict <- transform
   }
 
-  # update comparison argument - if user provides a formula for the new
-  # marginaleffects version, we still want the string-option for internal
-  # processing...
-  comparison <- .get_marginaleffects_hypothesis_argument(comparison)$comparison
-
   if (backend == "emmeans") {
     # Emmeans ------------------------------------------------------------------
     estimated <- get_emcontrasts(model,

diff --git a/R/format.R b/R/format.R
@@ -13,7 +13,7 @@ format.estimate_contrasts <- function(x, format = NULL, ...) {
   by <- rev(attr(x, "focal_terms", exact = TRUE))
   # add "Level" columns from contrasts
   if (all(c("Level1", "Level2") %in% colnames(x))) {
-    by <- unique(c("Level1", "Level2", by))
+    by <- unique(by, c("Level1", "Level2"))
   }
   # check which columns actually exist
   if (!is.null(by)) {
@@ -172,30 +172,39 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
     }
   }
 
-  # only when we have a comparison based on these options from marginaleffects,
-  # we want to "clean" the parameter names
-  valid_options <- .valid_hypothesis_strings()
+  # check type of contrast
+  is_ratio_comparison <- inherits(comparison, "formula") && identical(deparse(comparison[[2]]), "ratio")
 
-  # Column name for coefficient - fix needed for contrasting slopes
+  # Column name for coefficient - fix needed for contrasting slopes and ratios
   colnames(x)[colnames(x) == "Slope"] <- "Difference"
-
-  ## TODO: we should be able to process more ways of comparisons here,
-  ## e.g. also prettify labels and prepare levels for certain formula-written
-  ## comparisons. Need to find out which ones.
+  if (is_ratio_comparison) {
+    colnames(x)[colnames(x) == "Difference"] <- "Ratio"
+  }
 
   # for contrasting slopes, we do nothing more here. for other contrasts,
   # we prettify labels now
-  if (!is.null(comparison) && is.character(comparison) && comparison %in% valid_options) {
+
+  if (!is.null(comparison)) {
     #  the goal here is to create tidy columns with the comparisons.
     # marginaleffects returns a single column that contains all levels that
     # are contrasted. We want to have the contrasted levels per predictor in
     # a separate column. This is what we do here...
 
-    # split parameter column into comparison groups.
-    params <- as.data.frame(do.call(
-      rbind,
-      lapply(x$Parameter, .split_at_minus_outside_parentheses)
-    ))
+    if (is_ratio_comparison) {
+      params <- as.data.frame(do.call(
+        rbind,
+        lapply(x$Parameter, function(s) {
+          value_pairs <- insight::trim_ws(unlist(strsplit(s, "/", fixed = TRUE), use.names = FALSE))
+          gsub("(", "", gsub(")", "", value_pairs, fixed = TRUE), fixed = TRUE)
+        })
+      ))
+    } else {
+      # split parameter column into comparison groups.
+      params <- as.data.frame(do.call(
+        rbind,
+        lapply(x$Parameter, .split_at_minus_outside_parentheses)
+      ))
+    }
 
     # we *could* stop here and simply rename the split columns, but then
     # we cannot filter by `by` - thus, we go on, extract all single levels,
@@ -346,50 +355,16 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
       # }
       # ------------------------------------------------------------------
 
-      # filter by "by" variables
-      if (!is.null(by)) {
-        keep_rows <- seq_len(nrow(params))
-        for (i in by) {
-          by_names <- paste0(i, 1:2)
-          keep_rows <- keep_rows[apply(params[by_names], 1, function(j) {
-            all(j == j[1])
-          })]
-        }
-
-        # here we make sure that one of the "by" column has its original
-        # column name back, so we can properly merge all variables in
-        # "contrast" and "by" to the original data
-        by_columns <- paste0(by, 1)
-        params <- datawizard::data_rename(
-          params,
-          select = by_columns,
-          replacement = by,
-          verbose = FALSE
-        )
-
-        # filter original data and new params by "by"
-        x <- x[keep_rows, ]
-        params <- params[keep_rows, ]
-      }
-
       # remove old column
       x$Parameter <- NULL
 
       # add back new columns
-      x <- cbind(params[c(contrast, by)], x)
+      x <- cbind(params[contrast], x)
 
       # make sure terms are factors, for data_arrange later
       for (i in focal_terms) {
         x[[i]] <- factor(x[[i]], levels = unique(x[[i]]))
       }
-      # make sure filtering terms in `by` are factors, for data_arrange later
-      if (!is.null(by) && length(by)) {
-        for (i in by) {
-          if (i %in% colnames(dgrid) && i %in% colnames(x) && is.factor(dgrid[[i]]) && !is.factor(x[[i]])) { # nolint
-            x[[i]] <- factor(x[[i]], levels = unique(x[[i]]))
-          }
-        }
-      }
     }
   }
 

diff --git a/R/get_marginalcontrasts.R b/R/get_marginalcontrasts.R
@@ -13,6 +13,11 @@ get_marginalcontrasts <- function(model,
   # check if available
   insight::check_if_installed("marginaleffects")
 
+  # temporarily overwrite settings that error on "too many" rows
+  me_option <- getOption("marginaleffects_safe")
+  options(marginaleffects_safe = FALSE)
+  on.exit(options(marginaleffects_safe = me_option))
+
 
   # First step: prepare arguments ---------------------------------------------
   # ---------------------------------------------------------------------------
@@ -22,19 +27,17 @@ get_marginalcontrasts <- function(model,
     contrast <- "auto"
   }
 
+  # check whether contrasts should be made for numerics or categorical
+  model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
+  on_the_fly_factors <- attributes(model_data)$factors
+
   # Guess arguments
   my_args <- .guess_marginaleffects_arguments(model, by, contrast, verbose = verbose, ...)
 
   # sanitize comparison argument, to ensure compatibility between different
   # marginaleffects versions - newer versions don't accept a string argument,
   # only formulas (older versions don't accept formulas)
-  hypothesis_arg <- .get_marginaleffects_hypothesis_argument(comparison, ...)
-  # update / reset argument
-  comparison <- hypothesis_arg$comparison
-
-  # check whether contrasts should be made for numerics or categorical
-  model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
-  on_the_fly_factors <- attributes(model_data)$factors
+  my_args <- .get_marginaleffects_hypothesis_argument(comparison, my_args, model_data, ...)
 
   # extract first focal term
   first_focal <- my_args$contrast[1]
@@ -60,7 +63,7 @@ get_marginalcontrasts <- function(model,
       trend = my_args$contrast,
       by = my_args$by,
       ci = ci,
-      hypothesis = hypothesis_arg$hypothesis,
+      hypothesis = my_args$comparison_slopes,
       backend = "marginaleffects",
       verbose = verbose,
       ...
@@ -71,7 +74,7 @@ get_marginalcontrasts <- function(model,
       model = model,
       by = unique(c(my_args$contrast, my_args$by)),
       ci = ci,
-      hypothesis = hypothesis_arg$hypothesis,
+      hypothesis = my_args$comparison,
       predict = predict,
       backend = "marginaleffects",
       marginalize = marginalize,
@@ -95,7 +98,7 @@ get_marginalcontrasts <- function(model,
     info = list(
       contrast = my_args$contrast,
       predict = predict,
-      comparison = comparison,
+      comparison = my_args$comparison,
       marginalize = marginalize,
       p_adjust = p_adjust
     )
@@ -112,36 +115,83 @@ get_marginalcontrasts <- function(model,
 
 # make "comparison" argument compatible -----------------------------------
 
-.get_marginaleffects_hypothesis_argument <- function(comparison, ...) {
-  # save original argument
-  hypothesis <- comparison
-  # check if we have such a string
+.get_marginaleffects_hypothesis_argument <- function(comparison, my_args, model_data = NULL, ...) {
+  # init
+  comparison_slopes <- NULL
+  original_by <- my_args$by
+
+  # make sure "by" is a valid column name, and no filter-directive, like "Species='setosa'".
+  if (!is.null(my_args$by) && any(grepl("[^0-9A-Za-z\\.]", my_args$by))) {
+    my_args$by <- NULL
+  }
+
+  # convert comparison and by into a formula
   if (!is.null(comparison)) {
-    if (is.character(comparison) &&
-      comparison %in% .valid_hypothesis_strings() &&
-      isTRUE(insight::check_if_installed("marginaleffects", quietly = TRUE)) &&
-      utils::packageVersion("marginaleffects") > "0.24.0") {
-      # convert to formula
-      hypothesis <- stats::as.formula(paste("~", comparison))
-    } else if (inherits(comparison, "formula")) {
-      # convert to character
-      comparison_string <- all.vars(comparison)
-      # update comparison
-      if (length(comparison_string) == 1 && comparison_string %in% .valid_hypothesis_strings()) {
-        comparison <- comparison_string
+    # only proceed if we don't have custom comparisons
+    if (!.is_custom_comparison(comparison)) {
+      # if we have a formula as comparison, we convert it into strings in order to
+      # extract the information for "comparison" and "by", as we need for processing
+      # in modelbased.
+      if (inherits(comparison, "formula")) {
+        # check if we have grouping in the formula, indicated via "|". we split
+        # the formula into the three single components: lhs ~ rhs | group
+        f <- insight::trim_ws(unlist(strsplit(insight::safe_deparse(comparison), "[~|]")))
+        # extract formula parts
+        formula_lhs <- f[1]
+        formula_rhs <- f[2]
+        formula_group <- f[3]
+        # can be NA when no group
+        if (is.na(formula_group) || !nzchar(formula_group)) {
+          # no grouping via formula
+          formula_group <- NULL
+        } else {
+          # else, if we have groups, update by-argument
+          my_args$by <- formula_group
+        }
+      } else {
+        # sanity check for "comparison" argument
+        insight::validate_argument(comparison, .valid_hypothesis_strings())
+        formula_lhs <- "difference"
+        formula_rhs <- comparison
       }
+      # we put "by" into the formula. user either provided "by", or we put the
+      # group variable from the formula into "by", hence, "my_args$by" definitely
+      # contains the requested groups
+      formula_group <- my_args$by
+      # compose formula
+      f <- paste(formula_lhs, "~", paste(formula_rhs, collapse = "+"))
+      # for contrasts of slopes, we don *not* want the group-variable in the formula
+      comparison_slopes <- stats::as.formula(f)
+      # add group variable and update by
+      if (!is.null(formula_group)) {
+        f <- paste(f, "|", paste(formula_group, collapse = "+"))
+        my_args$by <- formula_group
+      }
+      comparison <- stats::as.formula(f)
     }
+  } else {
+    # default to pairwise
+    comparison <- comparison_slopes <- ~pairwise
   }
-  # we want: "hypothesis" is the original argument provided by the user,
-  # can be a formula like ~pairwise, or a string like "pairwise". This is
-  # converted into the appropriate type depending on the marginaleffects
-  # version. "comparison" should always be a character string, for internal
-  # processing.
-  list(hypothesis = hypothesis, comparison = comparison)
+  # remove "by" from "contrast"
+  my_args$contrast <- setdiff(my_args$contrast, my_args$by)
+
+  c(
+    # the "my_args" argument, containing "by" and "contrast"
+    my_args,
+    list(
+      # the modifed comparison, as formula, which also includes "by" as group
+      comparison = comparison,
+      # the modifed comparison, as formula, excluding "by" as group
+      comparison_slopes = comparison_slopes,
+      # the original "by" value, might be required for filtering
+      # (e.g. when `by = "Species='setosa'"`)
+      original_by = original_by
+    )
+  )
 }
 
 
-# these are the string values that need to be converted to formulas
 .valid_hypothesis_strings <- function() {
   c(
     "pairwise", "reference", "sequential", "meandev", "meanotherdev",

diff --git a/R/get_marginaltrends.R b/R/get_marginaltrends.R
@@ -17,8 +17,7 @@ get_marginaltrends <- function(model,
   dots <- list(...)
 
   # Guess arguments
-  trend <- .guess_marginaltrends_arguments(model, trend, by, verbose, ...)
-
+  trend <- .guess_marginaltrends_arguments(model, trend, verbose, ...)
 
   # First step: create a data grid --------------------------------------------
   # ---------------------------------------------------------------------------
@@ -93,7 +92,6 @@ get_marginaltrends <- function(model,
 #' @keywords internal
 .guess_marginaltrends_arguments <- function(model,
                                             trend = NULL,
-                                            by = NULL,
                                             verbose = TRUE,
                                             ...) {
   # Gather info

diff --git a/R/standardize_methods.R b/R/standardize_methods.R
@@ -49,7 +49,7 @@ standardize.estimate_contrasts <- function(x, robust = FALSE, ...) {
     }
 
     # Standardize relevant cols
-    for (col in c("Difference", "Coefficient", "SE", "MAD", "CI_low", "CI_high")) {
+    for (col in c("Difference", "Ratio", "Coefficient", "SE", "MAD", "CI_low", "CI_high")) {
       if (col %in% names(x)) {
         x[col] <- x[[col]] / disp
       }

diff --git a/man/estimate_contrasts.Rd b/man/estimate_contrasts.Rd