Skip to content

Commit

Permalink
Remove redundant label from group-variable (#366)
Browse files Browse the repository at this point in the history
* Remove redundant label from group-variable
Fixes #364

* fix by

* comment

* fix

* Update estimate_contrasts.R

* fix, add tests

* Update DESCRIPTION

* Update DESCRIPTION

* fix

* Update format.R

* Update format.R

* Update get_marginalcontrasts.R

* Update get_marginalcontrasts.R

* Remove redundant label from group-variable
Fixes #364

* Update format.R

* fix

* Update get_marginalcontrasts.R

* Update get_marginalcontrasts.R

* fix for contrasting slopes

* prepare

* ups

* fix

* update tests

* Update test-estimate_contrasts.R

* Update get_marginalcontrasts.R

* Update get_marginalcontrasts.R

* update snapshots

* Update ordinal.md

* Update test-estimate_means.R

* examples

* Update get_marginalcontrasts.R

* Update test-estimate_contrasts.R
  • Loading branch information
strengejacke authored Jan 27, 2025
1 parent 395e1b8 commit a6a70d8
Show file tree
Hide file tree
Showing 15 changed files with 331 additions and 303 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: modelbased
Title: Estimation of Model-Based Predictions, Contrasts and Means
Version: 0.8.9.103
Version: 0.8.9.104
Authors@R:
c(person(given = "Dominique",
family = "Makowski",
Expand Down Expand Up @@ -89,3 +89,4 @@ Roxygen: list(markdown = TRUE)
Config/Needs/check: stan-dev/cmdstanr
Config/Needs/website: easystats/easystatstemplate
LazyData: true
Remotes: vincentarelbundock/marginaleffects
10 changes: 0 additions & 10 deletions R/estimate_contrasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,6 @@
#' data$cyl <- as.factor(data$cyl)
#' data$am <- as.factor(data$am)
#'
#' model <- rstanarm::stan_glm(mpg ~ cyl * am, data = data, refresh = 0)
#' estimate_contrasts(model)
#' # fix `am` at value 1
#' estimate_contrasts(model, contrast = "cyl", by = "am='1'")
#'
#' model <- rstanarm::stan_glm(mpg ~ cyl * wt, data = data, refresh = 0)
#' estimate_contrasts(model)
#' estimate_contrasts(model, by = "wt", length = 4)
Expand Down Expand Up @@ -116,11 +111,6 @@ estimate_contrasts <- function(model,
predict <- transform
}

# update comparison argument - if user provides a formula for the new
# marginaleffects version, we still want the string-option for internal
# processing...
comparison <- .get_marginaleffects_hypothesis_argument(comparison)$comparison

if (backend == "emmeans") {
# Emmeans ------------------------------------------------------------------
estimated <- get_emcontrasts(model,
Expand Down
75 changes: 25 additions & 50 deletions R/format.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ format.estimate_contrasts <- function(x, format = NULL, ...) {
by <- rev(attr(x, "focal_terms", exact = TRUE))

Check warning on line 13 in R/format.R

View workflow job for this annotation

GitHub Actions / lint / lint

file=R/format.R,line=13,col=3,[object_overwrite_linter] 'by' is an exported object from package 'base'. Avoid re-using such symbols.
# add "Level" columns from contrasts
if (all(c("Level1", "Level2") %in% colnames(x))) {
by <- unique(c("Level1", "Level2", by))
by <- unique(by, c("Level1", "Level2"))

Check warning on line 16 in R/format.R

View workflow job for this annotation

GitHub Actions / lint / lint

file=R/format.R,line=16,col=5,[object_overwrite_linter] 'by' is an exported object from package 'base'. Avoid re-using such symbols.
}
# check which columns actually exist
if (!is.null(by)) {
Expand Down Expand Up @@ -172,30 +172,39 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
}
}

# only when we have a comparison based on these options from marginaleffects,
# we want to "clean" the parameter names
valid_options <- .valid_hypothesis_strings()
# check type of contrast
is_ratio_comparison <- inherits(comparison, "formula") && identical(deparse(comparison[[2]]), "ratio")

# Column name for coefficient - fix needed for contrasting slopes
# Column name for coefficient - fix needed for contrasting slopes and ratios
colnames(x)[colnames(x) == "Slope"] <- "Difference"

## TODO: we should be able to process more ways of comparisons here,
## e.g. also prettify labels and prepare levels for certain formula-written
## comparisons. Need to find out which ones.
if (is_ratio_comparison) {
colnames(x)[colnames(x) == "Difference"] <- "Ratio"
}

# for contrasting slopes, we do nothing more here. for other contrasts,
# we prettify labels now
if (!is.null(comparison) && is.character(comparison) && comparison %in% valid_options) {

if (!is.null(comparison)) {
# the goal here is to create tidy columns with the comparisons.
# marginaleffects returns a single column that contains all levels that
# are contrasted. We want to have the contrasted levels per predictor in
# a separate column. This is what we do here...

# split parameter column into comparison groups.
params <- as.data.frame(do.call(
rbind,
lapply(x$Parameter, .split_at_minus_outside_parentheses)
))
if (is_ratio_comparison) {
params <- as.data.frame(do.call(
rbind,
lapply(x$Parameter, function(s) {
value_pairs <- insight::trim_ws(unlist(strsplit(s, "/", fixed = TRUE), use.names = FALSE))
gsub("(", "", gsub(")", "", value_pairs, fixed = TRUE), fixed = TRUE)
})
))
} else {
# split parameter column into comparison groups.
params <- as.data.frame(do.call(
rbind,
lapply(x$Parameter, .split_at_minus_outside_parentheses)
))
}

# we *could* stop here and simply rename the split columns, but then
# we cannot filter by `by` - thus, we go on, extract all single levels,
Expand Down Expand Up @@ -346,50 +355,16 @@ format.marginaleffects_contrasts <- function(x, model = NULL, p_adjust = NULL, c
# }
# ------------------------------------------------------------------

# filter by "by" variables
if (!is.null(by)) {
keep_rows <- seq_len(nrow(params))
for (i in by) {
by_names <- paste0(i, 1:2)
keep_rows <- keep_rows[apply(params[by_names], 1, function(j) {
all(j == j[1])
})]
}

# here we make sure that one of the "by" column has its original
# column name back, so we can properly merge all variables in
# "contrast" and "by" to the original data
by_columns <- paste0(by, 1)
params <- datawizard::data_rename(
params,
select = by_columns,
replacement = by,
verbose = FALSE
)

# filter original data and new params by "by"
x <- x[keep_rows, ]
params <- params[keep_rows, ]
}

# remove old column
x$Parameter <- NULL

# add back new columns
x <- cbind(params[c(contrast, by)], x)
x <- cbind(params[contrast], x)

# make sure terms are factors, for data_arrange later
for (i in focal_terms) {
x[[i]] <- factor(x[[i]], levels = unique(x[[i]]))
}
# make sure filtering terms in `by` are factors, for data_arrange later
if (!is.null(by) && length(by)) {
for (i in by) {
if (i %in% colnames(dgrid) && i %in% colnames(x) && is.factor(dgrid[[i]]) && !is.factor(x[[i]])) { # nolint
x[[i]] <- factor(x[[i]], levels = unique(x[[i]]))
}
}
}
}
}

Expand Down
116 changes: 83 additions & 33 deletions R/get_marginalcontrasts.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ get_marginalcontrasts <- function(model,
# check if available
insight::check_if_installed("marginaleffects")

# temporarily overwrite settings that error on "too many" rows
me_option <- getOption("marginaleffects_safe")
options(marginaleffects_safe = FALSE)
on.exit(options(marginaleffects_safe = me_option))


# First step: prepare arguments ---------------------------------------------
# ---------------------------------------------------------------------------
Expand All @@ -22,19 +27,17 @@ get_marginalcontrasts <- function(model,
contrast <- "auto"
}

# check whether contrasts should be made for numerics or categorical
model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
on_the_fly_factors <- attributes(model_data)$factors

# Guess arguments
my_args <- .guess_marginaleffects_arguments(model, by, contrast, verbose = verbose, ...)

# sanitize comparison argument, to ensure compatibility between different
# marginaleffects versions - newer versions don't accept a string argument,
# only formulas (older versions don't accept formulas)
hypothesis_arg <- .get_marginaleffects_hypothesis_argument(comparison, ...)
# update / reset argument
comparison <- hypothesis_arg$comparison

# check whether contrasts should be made for numerics or categorical
model_data <- insight::get_data(model, source = "mf", verbose = FALSE)
on_the_fly_factors <- attributes(model_data)$factors
my_args <- .get_marginaleffects_hypothesis_argument(comparison, my_args, model_data, ...)

# extract first focal term
first_focal <- my_args$contrast[1]
Expand All @@ -60,7 +63,7 @@ get_marginalcontrasts <- function(model,
trend = my_args$contrast,
by = my_args$by,
ci = ci,
hypothesis = hypothesis_arg$hypothesis,
hypothesis = my_args$comparison_slopes,
backend = "marginaleffects",
verbose = verbose,
...
Expand All @@ -71,7 +74,7 @@ get_marginalcontrasts <- function(model,
model = model,
by = unique(c(my_args$contrast, my_args$by)),
ci = ci,
hypothesis = hypothesis_arg$hypothesis,
hypothesis = my_args$comparison,
predict = predict,
backend = "marginaleffects",
marginalize = marginalize,
Expand All @@ -95,7 +98,7 @@ get_marginalcontrasts <- function(model,
info = list(
contrast = my_args$contrast,
predict = predict,
comparison = comparison,
comparison = my_args$comparison,
marginalize = marginalize,
p_adjust = p_adjust
)
Expand All @@ -112,36 +115,83 @@ get_marginalcontrasts <- function(model,

# make "comparison" argument compatible -----------------------------------

.get_marginaleffects_hypothesis_argument <- function(comparison, ...) {
# save original argument
hypothesis <- comparison
# check if we have such a string
.get_marginaleffects_hypothesis_argument <- function(comparison, my_args, model_data = NULL, ...) {
# init
comparison_slopes <- NULL
original_by <- my_args$by

# make sure "by" is a valid column name, and no filter-directive, like "Species='setosa'".
if (!is.null(my_args$by) && any(grepl("[^0-9A-Za-z\\.]", my_args$by))) {
my_args$by <- NULL
}

# convert comparison and by into a formula
if (!is.null(comparison)) {
if (is.character(comparison) &&
comparison %in% .valid_hypothesis_strings() &&
isTRUE(insight::check_if_installed("marginaleffects", quietly = TRUE)) &&
utils::packageVersion("marginaleffects") > "0.24.0") {
# convert to formula
hypothesis <- stats::as.formula(paste("~", comparison))
} else if (inherits(comparison, "formula")) {
# convert to character
comparison_string <- all.vars(comparison)
# update comparison
if (length(comparison_string) == 1 && comparison_string %in% .valid_hypothesis_strings()) {
comparison <- comparison_string
# only proceed if we don't have custom comparisons
if (!.is_custom_comparison(comparison)) {
# if we have a formula as comparison, we convert it into strings in order to
# extract the information for "comparison" and "by", as we need for processing
# in modelbased.
if (inherits(comparison, "formula")) {
# check if we have grouping in the formula, indicated via "|". we split
# the formula into the three single components: lhs ~ rhs | group
f <- insight::trim_ws(unlist(strsplit(insight::safe_deparse(comparison), "[~|]")))
# extract formula parts
formula_lhs <- f[1]
formula_rhs <- f[2]
formula_group <- f[3]
# can be NA when no group
if (is.na(formula_group) || !nzchar(formula_group)) {
# no grouping via formula
formula_group <- NULL
} else {
# else, if we have groups, update by-argument
my_args$by <- formula_group
}
} else {
# sanity check for "comparison" argument
insight::validate_argument(comparison, .valid_hypothesis_strings())
formula_lhs <- "difference"
formula_rhs <- comparison
}
# we put "by" into the formula. user either provided "by", or we put the
# group variable from the formula into "by", hence, "my_args$by" definitely
# contains the requested groups
formula_group <- my_args$by
# compose formula
f <- paste(formula_lhs, "~", paste(formula_rhs, collapse = "+"))
# for contrasts of slopes, we don *not* want the group-variable in the formula
comparison_slopes <- stats::as.formula(f)
# add group variable and update by
if (!is.null(formula_group)) {
f <- paste(f, "|", paste(formula_group, collapse = "+"))
my_args$by <- formula_group
}
comparison <- stats::as.formula(f)
}
} else {
# default to pairwise
comparison <- comparison_slopes <- ~pairwise
}
# we want: "hypothesis" is the original argument provided by the user,
# can be a formula like ~pairwise, or a string like "pairwise". This is
# converted into the appropriate type depending on the marginaleffects
# version. "comparison" should always be a character string, for internal
# processing.
list(hypothesis = hypothesis, comparison = comparison)
# remove "by" from "contrast"
my_args$contrast <- setdiff(my_args$contrast, my_args$by)

c(
# the "my_args" argument, containing "by" and "contrast"
my_args,
list(
# the modifed comparison, as formula, which also includes "by" as group
comparison = comparison,
# the modifed comparison, as formula, excluding "by" as group
comparison_slopes = comparison_slopes,
# the original "by" value, might be required for filtering
# (e.g. when `by = "Species='setosa'"`)
original_by = original_by
)
)
}


# these are the string values that need to be converted to formulas
.valid_hypothesis_strings <- function() {
c(
"pairwise", "reference", "sequential", "meandev", "meanotherdev",
Expand Down
4 changes: 1 addition & 3 deletions R/get_marginaltrends.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ get_marginaltrends <- function(model,
dots <- list(...)

# Guess arguments
trend <- .guess_marginaltrends_arguments(model, trend, by, verbose, ...)

trend <- .guess_marginaltrends_arguments(model, trend, verbose, ...)

# First step: create a data grid --------------------------------------------
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -93,7 +92,6 @@ get_marginaltrends <- function(model,
#' @keywords internal
.guess_marginaltrends_arguments <- function(model,
trend = NULL,
by = NULL,
verbose = TRUE,
...) {
# Gather info
Expand Down
2 changes: 1 addition & 1 deletion R/standardize_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ standardize.estimate_contrasts <- function(x, robust = FALSE, ...) {
}

# Standardize relevant cols
for (col in c("Difference", "Coefficient", "SE", "MAD", "CI_low", "CI_high")) {
for (col in c("Difference", "Ratio", "Coefficient", "SE", "MAD", "CI_low", "CI_high")) {
if (col %in% names(x)) {
x[col] <- x[[col]] / disp
}
Expand Down
5 changes: 0 additions & 5 deletions man/estimate_contrasts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a6a70d8

Please sign in to comment.