Prepare CRAN release (#369)

* Prepare CRAN release * styler * structure code * comment code, simplify code * minor print * Update get_marginalmeans.R * fix issues with latest marginaleffects, update tests * Update get_marginaltrends.R * update readme * update test * fix johnson neymann * update tests * update readme * update readme * fix * Update DESCRIPTION * styler * remotes * rename marginalization -> estimate * "average" -> "sample" * Update WORDLIST * Update get_marginalcontrasts.R * update snapshots * update snapshots * update * Fix #377 * fix test * fix * harmonize order of arguments * typoe * styler * add ATT/ATU/ATE options * more informative error for g-computation * minor * lintr * add test * add test
easystats · Feb 5, 2025 · 3545cd1 · 3545cd1
1 parent d90ac3b
commit 3545cd1
Show file tree

Hide file tree

Showing 52 changed files with 1,053 additions and 552 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: modelbased
 Title: Estimation of Model-Based Predictions, Contrasts and Means
-Version: 0.8.9.107
+Version: 0.9.0
 Authors@R:
     c(person(given = "Dominique",
              family = "Makowski",
@@ -63,8 +63,9 @@ Suggests:
     lmerTest,
     logspline,
     MASS,
-    marginaleffects,
+    marginaleffects (>= 0.25.0),
     mgcv,
+    nanoparquet,
     performance (>= 0.13.0),
     patchwork,
     pbkrtest,
@@ -89,4 +90,3 @@ Roxygen: list(markdown = TRUE)
 Config/Needs/check: stan-dev/cmdstanr
 Config/Needs/website: easystats/easystatstemplate
 LazyData: true
-Remotes: vincentarelbundock/marginaleffects
diff --git a/NEWS.md b/NEWS.md
@@ -13,7 +13,11 @@
 - Argument `fixed` has been removed, as you can fix predictor at certain values
   using the `by` argument.
 
-- Argument `transform` is deprecated. Please use `predict` instead.
+- Argument `transform` is no longer used to determine the scale of the predictions.
+  Please use `predict` instead.
+
+- Argument `transform` is now used to (back-) transform predictions and confidence
+  intervals.
 
 - Argument `method` in `estimate_contrasts()` was renamed into `comparison`.
 
@@ -35,8 +39,8 @@
   predictions should be on the response scale, link scale, etc.). It can also
   be used to predict auxiliary (distributional) parameters.
 
-- `estimate_means()` and `estimate_contrasts()` get a `marginalize` argument,
-  to specify how to marginalize over non-focal terms. This results in slightly
+- `estimate_means()` and `estimate_contrasts()` get a `estimate` argument,
+  to specify how to estimate over non-focal terms. This results in slightly
   different predicted values, each approach answering a different question.
 
 - `estimate_contrasts()` gains a `backend` argument. This defaults to

diff --git a/R/estimate_contrasts.R b/R/estimate_contrasts.R
@@ -104,19 +104,13 @@ estimate_contrasts.default <- function(model,
                                        by = NULL,
                                        predict = NULL,
                                        ci = 0.95,
-                                       p_adjust = "none",
                                        comparison = "pairwise",
-                                       marginalize = "average",
-                                       backend = getOption("modelbased_backend", "marginaleffects"),
+                                       estimate = "average",
+                                       p_adjust = "none",
                                        transform = NULL,
+                                       backend = getOption("modelbased_backend", "marginaleffects"),
                                        verbose = TRUE,
                                        ...) {
-  ## TODO: remove deprecation warning later
-  if (!is.null(transform)) {
-    insight::format_warning("Argument `transform` is deprecated. Please use `predict` instead.")
-    predict <- transform
-  }
-
   if (backend == "emmeans") {
     # Emmeans ------------------------------------------------------------------
     estimated <- get_emcontrasts(model,
@@ -138,7 +132,8 @@ estimate_contrasts.default <- function(model,
       comparison = comparison,
       p_adjust = p_adjust,
       ci = ci,
-      marginalize = marginalize,
+      estimate = estimate,
+      transform = transform,
       verbose = verbose,
       ...
     )
@@ -150,7 +145,7 @@ estimate_contrasts.default <- function(model,
 
   # Table formatting
   attr(out, "table_title") <- c(ifelse(
-    marginalize == "specific",
+    estimate == "specific",
     "Model-based Contrasts Analysis",
     "Marginal Contrasts Analysis"
   ), "blue")

diff --git a/R/estimate_means.R b/R/estimate_means.R
@@ -41,18 +41,20 @@
 #' packages), for instance when using complex formulae in `brms` models, the
 #' `predict` argument can take the value of the parameter you want to estimate,
 #' for instance `"sigma"`, `"kappa"`, etc.
-#' @param marginalize Character string, indicating the type of marginalization.
-#' This dictates how the predictions are "averaged" over the non-focal predictors,
-#' i.e. those variables that are not specified in `by` or `contrast`.
+#' @param estimate Character string, indicating the type of target population
+#' predictions refer to. This dictates how the predictions are "averaged" over
+#' the non-focal predictors, i.e. those variables that are not specified in
+#' `by` or `contrast`.
 #' - `"average"` (default): Takes the mean value for non-focal numeric
 #'   predictors and marginalizes over the factor levels of non-focal terms,
 #'   which computes a kind of "weighted average" for the values at which these
 #'   terms are hold constant. These predictions are a good representation of the
 #'   sample, because all possible values and levels of the non-focal predictors
 #'   are considered. It answers the question, "What is the predicted value for
-#'   an 'average' observation in *my data*?". It refers to randomly picking a
-#'   subject of your sample and the result you get on average. This approach is
-#'   the one taken by default in the `emmeans` package.
+#'   an 'average' observation in *my data*?". Cum grano salis, it refers to
+#'   randomly picking a subject of your sample and the result you get on
+#'   average. This approach is the one taken by default in the `emmeans`
+#'   package.
 #' - `"population"`: Non-focal predictors are marginalized over the observations
 #'   in the sample, where the sample is replicated multiple times to produce
 #'   "counterfactuals" and then takes the average of these predicted values
@@ -65,15 +67,15 @@
 #'   your observed sample, but also "what would be if" we had more data, or if
 #'   we had data from a different sample.
 #'
-#' In other words, the distinction between marginalization types resides in whether
+#' In other words, the distinction between estimate types resides in whether
 #' the prediction are made for:
 #' - A specific "individual" from the sample (i.e., a specific combination of
 #'   predictor values): this is what is obtained when using [`estimate_relation()`]
 #'   and the other prediction functions.
 #' - An average individual from the sample: obtained with
-#'   `estimate_means(..., marginalize = "average")`
+#'   `estimate_means(..., estimate = "average")`
 #' - The broader, hypothetical target population: obtained with
-#'   `estimate_means(..., marginalize = "population")`
+#'   `estimate_means(..., estimate = "population")`
 #' @param backend Whether to use `"emmeans"` or `"marginaleffects"` as a backend.
 #' Results are usually very similar. The major difference will be found for mixed
 #' models, where `backend = "marginaleffects"` will also average across random
@@ -84,7 +86,13 @@
 #' `options(modelbased_backend = "emmeans")` to use the **emmeans** package or
 #' `options(modelbased_backend = "marginaleffects")` to set **marginaleffects**
 #' as default backend.
-#' @param transform Deprecated, please use `predict` instead.
+#' @param transform A function applied to predictions and confidence intervals
+#' to (back-) transform results, which can be useful in case the regression
+#' model has a transformed response variable (e.g., `lm(log(y) ~ x)`). For
+#' Bayesian models, this function is applied to individual draws from the
+#' posterior distribution, before computing summaries. Can also be `TRUE`, in
+#' which case `insight::get_transformation()` is called to determine the
+#' appropriate transformation-function.
 #' @param verbose Use `FALSE` to silence messages and warnings.
 #' @param ... Other arguments passed, for instance, to [insight::get_datagrid()],
 #' to functions from the **emmeans** or **marginaleffects** package, or to process
@@ -93,8 +101,8 @@
 #'   to control the (number of) representative values.
 #' - **marginaleffects**: Internally used functions are `avg_predictions()` for
 #'   means and contrasts, and `avg_slope()` for slopes. Therefore, arguments
-#'   for instance like `vcov`, `transform`, `equivalence` or `slope` can be
-#'   passed to those functions.
+#'   for instance like `vcov`, `transform`, `equivalence`, `slope` or even
+#'   `newdata` can be passed to those functions.
 #' - **emmeans**: Internally used functions are `emmeans()` and `emtrends()`.
 #'   Additional arguments can be passed to these functions.
 #' - Bayesian models: For Bayesian models, parameters are cleaned using
@@ -172,30 +180,39 @@ estimate_means <- function(model,
                            by = "auto",
                            predict = NULL,
                            ci = 0.95,
-                           marginalize = "average",
-                           backend = getOption("modelbased_backend", "marginaleffects"),
+                           estimate = "average",
                            transform = NULL,
+                           backend = getOption("modelbased_backend", "marginaleffects"),
                            verbose = TRUE,
                            ...) {
-  ## TODO: remove deprecation warning later
-  if (!is.null(transform)) {
-    insight::format_warning("Argument `transform` is deprecated. Please use `predict` instead.")
-    predict <- transform
-  }
-
   # validate input
-  marginalize <- insight::validate_argument(
-    marginalize,
+  estimate <- insight::validate_argument(
+    estimate,
     c("average", "population", "specific")
   )
 
   if (backend == "emmeans") {
     # Emmeans ------------------------------------------------------------------
-    estimated <- get_emmeans(model, by = by, predict = predict, verbose = verbose, ...)
+    estimated <- get_emmeans(
+      model,
+      by = by,
+      predict = predict,
+      verbose = verbose,
+      ...
+    )
     means <- .format_emmeans_means(estimated, model, ci = ci, verbose = verbose, ...)
   } else {
     # Marginalmeans ------------------------------------------------------------
-    estimated <- get_marginalmeans(model, by = by, predict = predict, ci = ci, marginalize = marginalize, verbose = verbose, ...) # nolint
+    estimated <- get_marginalmeans(
+      model,
+      by = by,
+      predict = predict,
+      ci = ci,
+      estimate = estimate,
+      transform = transform,
+      verbose = verbose,
+      ...
+    )
     means <- format(estimated, model, ...)
   }
 
@@ -204,13 +221,13 @@ estimate_means <- function(model,
 
   # Table formatting
   attr(means, "table_title") <- c(ifelse(
-    marginalize == "specific",
+    estimate == "specific",
     "Model-based Predictions",
     "Estimated Marginal Means"
   ), "blue")
   attr(means, "table_footer") <- .table_footer(
     means,
-    type = ifelse(marginalize == "specific", "predictions", "means"),
+    type = ifelse(estimate == "specific", "predictions", "means"),
     by = info$by,
     model = model,
     info = info

diff --git a/R/estimate_predicted.R b/R/estimate_predicted.R
@@ -173,6 +173,12 @@
 #' you are directly predicting the value of some distributional parameter), and
 #' the corresponding functions will then only differ in the default value of
 #' their `data` argument.
+#' @param transform A function applied to predictions and confidence intervals
+#' to (back-) transform results, which can be useful in case the regression
+#' model has a transformed response variable (e.g., `lm(log(y) ~ x)`). Can also
+#' be `TRUE`, in which case `insight::get_transformation()` is called to
+#' determine the appropriate transformation-function. **Note:** Standard errors
+#' are not (back-) transformed!
 #' @param ... You can add all the additional control arguments from
 #' [insight::get_datagrid()] (used when `data = "grid"`) and
 #' [insight::get_predicted()].
@@ -228,6 +234,7 @@ estimate_expectation <- function(model,
                                  by = NULL,
                                  predict = "expectation",
                                  ci = 0.95,
+                                 transform = NULL,
                                  keep_iterations = FALSE,
                                  ...) {
   .estimate_predicted(
@@ -237,6 +244,7 @@ estimate_expectation <- function(model,
     ci = ci,
     keep_iterations = keep_iterations,
     predict = predict,
+    transform = transform,
     ...
   )
 }
@@ -249,6 +257,7 @@ estimate_link <- function(model,
                           by = NULL,
                           predict = "link",
                           ci = 0.95,
+                          transform = NULL,
                           keep_iterations = FALSE,
                           ...) {
   # reset to NULL if only "by" was specified
@@ -263,6 +272,7 @@ estimate_link <- function(model,
     ci = ci,
     keep_iterations = keep_iterations,
     predict = predict,
+    transform = transform,
     ...
   )
 }
@@ -274,6 +284,7 @@ estimate_prediction <- function(model,
                                 by = NULL,
                                 predict = "prediction",
                                 ci = 0.95,
+                                transform = NULL,
                                 keep_iterations = FALSE,
                                 ...) {
   .estimate_predicted(
@@ -283,6 +294,7 @@ estimate_prediction <- function(model,
     ci = ci,
     keep_iterations = keep_iterations,
     predict = predict,
+    transform = transform,
     ...
   )
 }
@@ -294,6 +306,7 @@ estimate_relation <- function(model,
                               by = NULL,
                               predict = "expectation",
                               ci = 0.95,
+                              transform = NULL,
                               keep_iterations = FALSE,
                               ...) {
   # reset to NULL if only "by" was specified
@@ -308,6 +321,7 @@ estimate_relation <- function(model,
     ci = ci,
     keep_iterations = keep_iterations,
     predict = predict,
+    transform = transform,
     ...
   )
 }
@@ -321,6 +335,7 @@ estimate_relation <- function(model,
                                 by = NULL,
                                 predict = "expectation",
                                 ci = 0.95,
+                                transform = NULL,
                                 keep_iterations = FALSE,
                                 ...) {
   # only "by" or "data", but not both
@@ -445,6 +460,16 @@ estimate_relation <- function(model,
     out$Residuals <- response - out$Predicted
   }
 
+  # transform reponse?
+  if (isTRUE(transform)) {
+    transform <- insight::get_transformation(model, verbose = FALSE)$inverse
+  }
+  if (!is.null(transform)) {
+    out$Predicted <- transform(out$Predicted)
+    out$CI_low <- transform(out$CI_low)
+    out$CI_high <- transform(out$CI_high)
+  }
+
   # Store relevant information
   attr(out, "ci") <- ci
   attr(out, "keep_iterations") <- keep_iterations