Skip to content

Commit

Permalink
Merge pull request #270 from jr-leary7/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
jr-leary7 authored Nov 13, 2024
2 parents 3f206ba + fcc3743 commit f0f3927
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 40 deletions.
4 changes: 2 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
+ Switched GEE fitting back to use `scale.fix = FALSE` and substituted a fixed value for the Negative-binomial overdispersion parameter (instead of estimating via method-of-moments) as it improves model fits.
+ Added option to use a Lagrange Multiplier (Score) test for GEE mode instead of the default Wald test. The relevant argument is `gee.test` in `testDynamic()`.
+ Updated documentation and some tests.
+ Added column called `Null_Fit_Notes` to output from `getResultsDE()` to describe when and how null models fail. This doesn't happen frequently, but it's good info to have when it does.
+ Added column called `Null_Fit_Notes` to output from `getResultsDE()` to describe when and how null models fail. This doesn't happen frequently, but it's good info to have when it does.
+ Expanded test suite to include C++ functions.
+ Fixed incorrectly-implemented Lagrange Multiplier test after initial code was seen to be incorrect.
+ Fixed incorrectly-implemented Lagrange Multiplier test after initial code was seen to be incorrect. See [this paper](https://doi.org/10.32614/RJ-2023-056) for the relevant implementation details.

# Changes in v0.8.6

Expand Down
27 changes: 13 additions & 14 deletions R/scoreTestGEE.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#' @importFrom Matrix bdiag
#' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
#' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL.
#' @param null.df The dataframe used to fit the null model. Defaults to NULL.
#' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL.
#' @param null.df The dataframe used to fit the null model. Defaults to NULL.
#' @param id.vec A vector of subject IDs to use as input to \code{\link{marge2}}. Defaults to NULL.
#' @param cor.structure A string specifying the working correlation structure used to fit each model. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1".
#' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test.
Expand All @@ -22,13 +22,13 @@
#' @seealso \code{\link{waldTestGEE}}
#' @seealso \code{\link{modelLRT}}

scoreTestGEE <- function(mod.1 = NULL,
mod.0 = NULL,
alt.df = NULL,
null.df = NULL,
id.vec = NULL,
scoreTestGEE <- function(mod.1 = NULL,
mod.0 = NULL,
alt.df = NULL,
null.df = NULL,
id.vec = NULL,
cor.structure = "ar1") {
# check inputs
# check inputs
if (is.null(mod.1) || is.null(mod.0) || is.null(alt.df) || is.null(null.df) || is.null(id.vec)) { stop("Please provide all inputs to scoreTestGEE().") }
if (inherits(mod.1, "try-error") || inherits(mod.0, "try-error")) {
res <- list(Score_Stat = NA_real_,
Expand All @@ -52,10 +52,9 @@ scoreTestGEE <- function(mod.1 = NULL,
theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family)))
X_null <- stats::model.matrix(mod.0, data = null.df)
X_alt <- stats::model.matrix(mod.1, data = alt.df)
r_null <- null.df$Y - stats::predict(mod.0)
r_null <- null.df$Y - exp(stats::predict(mod.0))
p_alt <- ncol(X_alt)
groups <- unique(id.vec)
i <- 1
W <- K <- vector("list", length = length(groups))
for (i in seq(groups)) {
group_idx <- which(id.vec == groups[i])
Expand All @@ -70,7 +69,7 @@ scoreTestGEE <- function(mod.1 = NULL,
R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i)
}
# create working covariance matrix V_i
mu_i <- stats::predict(mod.0)[group_idx]
mu_i <- exp(stats::predict(mod.0)[group_idx])
V_mu_i <- mu_i * (1 + mu_i / theta)
A_i <- diag(V_mu_i)
A_i_sqrt <- sqrt(A_i) # same as taking the 1/2 power of A_i since A_i is diagonal
Expand All @@ -91,15 +90,15 @@ scoreTestGEE <- function(mod.1 = NULL,
if (inherits(K_inv, "try-error")) {
K_inv <- eigenMapPseudoInverse(K)
}
# generate score vector
# generate score vector
U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null
# generate variance of score vector and invert it
# generate variance of score vector and invert it
V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt
V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
if (inherits(V_U_inv, "try-error")) {
V_U_inv <- eigenMapPseudoInverse(V_U)
}
# estimate test statistic and accompanying p-value
# estimate test statistic and accompanying p-value
S <- t(U) %*% V_U_inv %*% U
S_df <- ncol(X_alt) - ncol(X_null)
p_value <- 1 - stats::pchisq(S, df = S_df)
Expand Down
44 changes: 22 additions & 22 deletions R/testDynamic.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' @param is.gee Should a GEE framework be used instead of the default GLM? Defaults to FALSE.
#' @param cor.structure If the GEE framework is used, specifies the desired working correlation structure. Must be one of "ar1", "independence", or "exchangeable". Defaults to "ar1".
#' @param gee.bias.correction.method (Optional) Specify which small-sample bias correction to be used on the sandwich variance-covariance matrix prior to test statistic estimation. Options are "kc" and "df". Defaults to NULL, indicating the use of the model-based variance.
#' @param gee.test A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "wald".
#' @param gee.test A string specifying the type of test used to estimate the significance of the full model. Must be one of "wald" or "score". Defaults to "score".
#' @param is.glmm Should a GLMM framework be used instead of the default GLM? Defaults to FALSE.
#' @param id.vec If a GEE or GLMM framework is being used, a vector of subject IDs to use as input to \code{\link[geeM]{geem}} or \code{\link[glmmTMB]{glmmTMB}}. Defaults to NULL.
#' @param glmm.adaptive (Optional) Should the basis functions for the GLMM be chosen adaptively? If not, uses 4 evenly spaced knots. Defaults to TRUE.
Expand Down Expand Up @@ -64,7 +64,7 @@ testDynamic <- function(expr.mat = NULL,
is.gee = FALSE,
cor.structure = "ar1",
gee.bias.correction.method = NULL,
gee.test = "wald",
gee.test = "score",
is.glmm = FALSE,
glmm.adaptive = TRUE,
id.vec = NULL,
Expand Down Expand Up @@ -111,10 +111,10 @@ testDynamic <- function(expr.mat = NULL,
colnames(pt) <- paste0("Lineage_", LETTERS[seq(n_lineages)])

# ensure subject ID vector meets criteria for GEE / GLMM fitting
if ((is.gee || is.glmm) && is.null(id.vec)) { stop("You must provide a vector of IDs if you're using GEE / GLMM backends.") }
if ((is.gee || is.glmm) && is.unsorted(id.vec)) { stop("Your data must be ordered by subject, please do so before running testDynamic() with the GEE / GLMM backends.") }
if ((is.gee || is.glmm) && is.null(id.vec)) { stop("You must provide a vector of IDs if you're using GEE / GLMM modes.") }
if ((is.gee || is.glmm) && is.unsorted(id.vec)) { stop("Your data must be ordered by subject, please do so before running testDynamic() with the GEE / GLMM modes.") }
cor.structure <- tolower(cor.structure)
if (is.gee && !(cor.structure %in% c("ar1", "independence", "exchangeable"))) { stop("GEE models require a specified correlation structure.") }
if (is.gee && !(cor.structure %in% c("ar1", "independence", "exchangeable"))) { stop("GEE models require a valid correlation structure.") }
# check GEE testing method
gee.test <- tolower(gee.test)
if (is.gee & !gee.test %in% c("wald", "score")) { stop("GEE testing method must be one of score or wald.") }
Expand Down Expand Up @@ -147,7 +147,7 @@ testDynamic <- function(expr.mat = NULL,
is_read_only = TRUE)

# build list of objects to prevent from being sent to parallel workers
necessary_vars <- c("expr.mat", "genes", "pt", "n.potential.basis.fns", "approx.knot", "is.glmm", "gee.bias.correction.method", "gee.test",
necessary_vars <- c("expr.mat", "genes", "pt", "n.potential.basis.fns", "approx.knot", "is.glmm", "gee.bias.correction.method", "gee.test",
"verbose", "n_lineages", "id.vec", "cor.structure", "is.gee", "gee.scale.fix", "glmm.adaptive", "size.factor.offset")
if (any(ls(envir = .GlobalEnv) %in% necessary_vars)) {
no_export <- c(ls(envir = .GlobalEnv)[-which(ls(envir = .GlobalEnv) %in% necessary_vars)],
Expand Down Expand Up @@ -320,19 +320,19 @@ testDynamic <- function(expr.mat = NULL,
lineage_list[[j]] <- list(Gene = genes[i],
Lineage = LETTERS[j],
Test_Stat = NA_real_,
Test_Stat_Type = ifelse(is.gee,
ifelse(gee.test == "wald", "Wald", "Score"),
Test_Stat_Type = ifelse(is.gee,
ifelse(gee.test == "wald", "Wald", "Score"),
"LRT"),
Test_Stat_Note = NA_character_,
Degrees_Freedom = NA_real_,
Degrees_Freedom = NA_real_,
P_Val = NA_real_,
LogLik_MARGE = marge_sumy$ll_marge,
LogLik_Null = null_sumy$null_ll,
Dev_MARGE = marge_sumy$marge_dev,
Dev_Null = null_sumy$null_dev,
Model_Status = mod_status,
MARGE_Fit_Notes = marge_sumy$marge_fit_notes,
Null_Fit_Notes = null_sumy$null_fit_notes,
Null_Fit_Notes = null_sumy$null_fit_notes,
Gene_Time = gene_time_end_numeric,
MARGE_Summary = marge_sumy$marge_sumy_df,
Null_Summary = null_sumy$null_sumy_df,
Expand All @@ -350,11 +350,11 @@ testDynamic <- function(expr.mat = NULL,
id.vec = id.vec[lineage_cells],
verbose = verbose)
} else if (gee.test == "score") {
test_res <- scoreTestGEE(mod.1 = marge_mod,
mod.0 = null_mod,
alt.df = as.data.frame(marge_mod$basis_mtx),
null.df = null_mod_df,
id.vec = id.vec[lineage_cells],
test_res <- scoreTestGEE(mod.1 = marge_mod,
mod.0 = null_mod,
alt.df = as.data.frame(marge_mod$basis_mtx),
null.df = null_mod_df,
id.vec = id.vec[lineage_cells],
cor.structure = cor.structure)
}
} else {
Expand All @@ -363,11 +363,11 @@ testDynamic <- function(expr.mat = NULL,
is.glmm = is.glmm)
}
# add test stats to result list
lineage_list[[j]]$Test_Stat <- ifelse(is.gee,
ifelse(gee.test == "wald", test_res$Wald_Stat, test_res$Score_Stat),
lineage_list[[j]]$Test_Stat <- ifelse(is.gee,
ifelse(gee.test == "wald", test_res$Wald_Stat, test_res$Score_Stat),
test_res$LRT_Stat)
lineage_list[[j]]$Test_Stat_Note <- test_res$Notes
lineage_list[[j]]$Degrees_Freedom <- test_res$DF
lineage_list[[j]]$Degrees_Freedom <- test_res$DF
lineage_list[[j]]$P_Val <- test_res$P_Val
}
names(lineage_list) <- paste0("Lineage_", LETTERS[seq(n_lineages)])
Expand All @@ -391,11 +391,11 @@ testDynamic <- function(expr.mat = NULL,
list(Gene = y,
Lineage = LETTERS[z],
Test_Stat = NA_real_,
Test_Stat_Type = ifelse(is.gee,
ifelse(gee.test == "wald", "Wald", "Score"),
Test_Stat_Type = ifelse(is.gee,
ifelse(gee.test == "wald", "Wald", "Score"),
"LRT"),
Test_Stat_Note = NA_character_,
Degrees_Freedom = NA_real_,
Degrees_Freedom = NA_real_,
P_Val = NA_real_,
LogLik_MARGE = NA_real_,
LogLik_Null = NA_real_,
Expand All @@ -404,7 +404,7 @@ testDynamic <- function(expr.mat = NULL,
Model_Status = x[1],
Gene_Time = NA_real_,
MARGE_Fit_Notes = NA_character_,
Null_Fit_Notes = NA_character_,
Null_Fit_Notes = NA_character_,
MARGE_Summary = NULL,
Null_Summary = NULL,
MARGE_Preds = NULL,
Expand Down
4 changes: 2 additions & 2 deletions man/testDynamic.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit f0f3927

Please sign in to comment.