add test, simplify code, add comments

easystats · Jan 28, 2025 · ea7fe83 · ea7fe83
1 parent a6a70d8
commit ea7fe83
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 44 deletions.
diff --git a/R/format.R b/R/format.R
@@ -165,6 +165,7 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
   # if we have `by = "name [fivenum]"`, we just want "name"
   for (i in focal_terms) {
     if (!is.null(by) && any(startsWith(by, i)) && !any(by %in% i)) {
+      # this line could be replaced by strsplit(by, "[^0-9A-Za-z\\.]")[[1]][1]
       by[startsWith(by, i)] <- i
     }
     if (!is.null(contrast) && any(startsWith(contrast, i)) && !any(contrast %in% i)) {
@@ -177,8 +178,12 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
 
   # Column name for coefficient - fix needed for contrasting slopes and ratios
   colnames(x)[colnames(x) == "Slope"] <- "Difference"
+  separator <- "-"
+
+  # for ratios, we want different column name, and we need to set the separator
   if (is_ratio_comparison) {
     colnames(x)[colnames(x) == "Difference"] <- "Ratio"
+    separator <- "/"
   }
 
   # for contrasting slopes, we do nothing more here. for other contrasts,
@@ -190,21 +195,10 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
     # are contrasted. We want to have the contrasted levels per predictor in
     # a separate column. This is what we do here...
 
-    if (is_ratio_comparison) {
-      params <- as.data.frame(do.call(
-        rbind,
-        lapply(x$Parameter, function(s) {
-          value_pairs <- insight::trim_ws(unlist(strsplit(s, "/", fixed = TRUE), use.names = FALSE))
-          gsub("(", "", gsub(")", "", value_pairs, fixed = TRUE), fixed = TRUE)
-        })
-      ))
-    } else {
-      # split parameter column into comparison groups.
-      params <- as.data.frame(do.call(
-        rbind,
-        lapply(x$Parameter, .split_at_minus_outside_parentheses)
-      ))
-    }
+    params <- as.data.frame(do.call(
+      rbind,
+      lapply(x$Parameter, .split_at_minus_outside_parentheses, separator = separator)
+    ))
 
     # we *could* stop here and simply rename the split columns, but then
     # we cannot filter by `by` - thus, we go on, extract all single levels,
@@ -574,40 +568,46 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
 # parentheses
 
 #' @keywords internal
-.split_at_minus_outside_parentheses <- function(input_string) {
-  pattern <- "\\(([^()]*)\\)|-" # find all the parentheses and the -
-  matches <- gregexpr(pattern, input_string, perl = TRUE)
-  match_positions <- matches[[1]]
-  match_lengths <- attr(matches[[1]], "match.length")
-
-  split_positions <- 0
-  for (i in seq_along(match_positions)) {
-    if (substring(input_string, match_positions[i], match_positions[i]) == "-") {
-      inside_parentheses <- FALSE
-      for (j in seq_along(match_positions)) {
-        if (i != j && match_positions[i] > match_positions[j] && match_positions[i] < (match_positions[j] + match_lengths[j])) {
-          inside_parentheses <- TRUE
-          break
+.split_at_minus_outside_parentheses <- function(input_string, separator = "-") {
+  # we split at "-" for differences, and at "/" for ratios
+  if (identical(separator, "/")) {
+    parts <- unlist(strsplit(input_string, "/", fixed = TRUE), use.names = FALSE)
+  } else {
+    pattern <- "\\(([^()]*)\\)|-" # find all the parentheses and the -
+    matches <- gregexpr(pattern, input_string, perl = TRUE)
+    match_positions <- matches[[1]]
+    match_lengths <- attr(matches[[1]], "match.length")
+
+    split_positions <- 0
+    for (i in seq_along(match_positions)) {
+      if (substring(input_string, match_positions[i], match_positions[i]) == "-") {
+        inside_parentheses <- FALSE
+        for (j in seq_along(match_positions)) {
+          if (i != j && match_positions[i] > match_positions[j] && match_positions[i] < (match_positions[j] + match_lengths[j])) {
+            inside_parentheses <- TRUE
+            break
+          }
+        }
+        if (!inside_parentheses) {
+          split_positions <- c(split_positions, match_positions[i])
         }
-      }
-      if (!inside_parentheses) {
-        split_positions <- c(split_positions, match_positions[i])
       }
     }
-  }
-  split_positions <- c(split_positions, nchar(input_string) + 1)
-
-  parts <- NULL
-  for (i in 1:(length(split_positions) - 1)) {
-    parts <- c(
-      parts,
-      substring(
-        input_string,
-        split_positions[i] + 1,
-        split_positions[i + 1] - 1
+    split_positions <- c(split_positions, nchar(input_string) + 1)
+
+    parts <- NULL
+    for (i in 1:(length(split_positions) - 1)) {
+      parts <- c(
+        parts,
+        substring(
+          input_string,
+          split_positions[i] + 1,
+          split_positions[i + 1] - 1
+        )
       )
-    )
+    }
   }
+
   parts <- insight::trim_ws(parts)
   gsub("(", "", gsub(")", "", parts, fixed = TRUE), fixed = TRUE)
 }
diff --git a/R/get_marginalcontrasts.R b/R/get_marginalcontrasts.R
@@ -122,6 +122,14 @@ get_marginalcontrasts <- function(model,
 
   # make sure "by" is a valid column name, and no filter-directive, like "Species='setosa'".
   if (!is.null(my_args$by) && any(grepl("[^0-9A-Za-z\\.]", my_args$by))) {
+
+    ## TODO: handle with by-filters
+    # for things like estimate_contrasts(model, "gear", by = "am='1'"), we can't
+    # use `by` as group in the formula, thus we remove by here - but it's still
+    # saved in `original_by`. We could use `original_by` either for creating a
+    # data grid, or for filtering. Currently, this is not supported for
+    # `estimate_contrasts()` (but for estimate_means()).
+
     my_args$by <- NULL
   }
 

diff --git a/tests/testthat/test-estimate_contrasts.R b/tests/testthat/test-estimate_contrasts.R
@@ -275,6 +275,15 @@ test_that("estimate_contrasts - p.adjust", {
 })
 
 
+test_that("estimate_contrasts - ratios", {
+  data(iris)
+  model <- lm(Petal.Width ~ Species, data = iris)
+  estim <- estimate_contrasts(model, "Species", comparison = ratio ~ pairwise, backend = "marginaleffects")
+  expect_equal(estim$Ratio, c(5.39024, 8.23577, 1.5279), tolerance = 1e-4)
+  expect_identical(dim(estim), c(3L, 9L))
+})
+
+
 test_that("estimate_contrasts - dfs", {
   skip_on_cran()
   skip_if_not_installed("lme4")
@@ -436,6 +445,7 @@ test_that("estimate_contrasts - different options for comparison", {
   expect_equal(out$Difference, c(0.35, -0.8, -0.35), tolerance = 1e-3)
 })
 
+
 skip_on_os(c("mac", "linux"))
 
 test_that("estimate_contrasts - filtering works", {