fixed score test function to provide more accurate values

jr-leary7 · Nov 10, 2024 · e02f6c0 · e02f6c0
1 parent 8a63798
commit e02f6c0
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 24 deletions.
diff --git a/R/scoreTestGEE.R b/R/scoreTestGEE.R
@@ -4,6 +4,7 @@
 #' @author Jack R. Leary
 #' @description Performs a basic Lagrange multiplier test to determine whether an alternate model is significantly better than a nested null model. This is the GEE equivalent (kind of) of \code{\link{modelLRT}}. Be careful with small sample sizes.
 #' @importFrom stats model.matrix predict pchisq
+#' @importFrom Matrix bdiag
 #' @param mod.1 The model under the alternative hypothesis. Must be of class \code{geem}. Defaults to NULL.
 #' @param mod.0 The model under the null hypothesis. Must be of class \code{geem}. Defaults to NULL.
 #' @param alt.df The dataframe used to fit the alternative model. Defaults to NULL. 
@@ -13,10 +14,11 @@
 #' @return A list containing the Score test statistic, a \emph{p}-value, and the degrees of freedom used in the test.
 #' @details
 #' \itemize{
-#' \item Calculating the test statistic involves taking the inverse of the variance-covariance matrix of the coefficients. Ideally this would be done using the "true" inverse with something like \code{\link{solve}}, \code{\link{qr.solve}}, or \code{\link{chol2inv}}, but in practice this can cause issues when the variance-covariance matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse as implemented in \code{\link[MASS]{ginv}} instead, which leads to more stable results.
+#' \item Calculating the test statistic involves taking the inverse of the variance of the score vector. Ideally this would be done using the true inverse, but in practice this can cause issues when the matrix is near-singular. With this in mind, we use the Moore-Penrose pseudoinverse if the original matrix inversion fails.
 #' \item The \emph{p}-value is calculated using an asymptotic \eqn{\Chi^2} distribution, with the degrees of freedom equal to the number of non-intercept coefficients in the alternative model.
 #' }
 #' @seealso \code{\link[geeM]{geem}}
+#' @seealso \code{\link[glmtoolbox]{anova2}}
 #' @seealso \code{\link{waldTestGEE}}
 #' @seealso \code{\link{modelLRT}}
 
@@ -46,19 +48,19 @@ scoreTestGEE <- function(mod.1 = NULL,
                 Notes = NA_character_)
   } else {
     rho <- summary(mod.0)$alpha
-    sigma2 <- summary(mod.0)$phi
+    phi <- summary(mod.0)$phi
+    theta <- as.numeric(gsub("\\)", "", gsub(".*\\(", "", mod.1$FunList$family)))
     X_null <- stats::model.matrix(mod.0, data = null.df)
     X_alt <- stats::model.matrix(mod.1, data = alt.df)
-    residuals_null <- null.df$Y - exp(stats::predict(mod.0))
+    r_null <- null.df$Y - stats::predict(mod.0)
     p_alt <- ncol(X_alt)
-    U <- rep(0, p_alt)
-    V_U <- matrix(0, nrow = p_alt, ncol = p_alt)
     groups <- unique(id.vec)
+    i <- 1
+    W <- K <- vector("list", length = length(groups))
     for (i in seq(groups)) {
-      group_indices <- which(id.vec == groups[i])
-      X_alt_i <- X_alt[group_indices, , drop = FALSE]
-      residuals_i <- residuals_null[group_indices]
-      n_i <- length(group_indices)
+      group_idx <- which(id.vec == groups[i])
+      n_i <- length(group_idx)
+      # create working correlation matrix R_i
       if (cor.structure == "independence") {
         R_i <- diag(1, nrow = n_i, ncol = n_i)
       } else if (cor.structure == "exchangeable") {
@@ -67,26 +69,42 @@ scoreTestGEE <- function(mod.1 = NULL,
       } else if (cor.structure == "ar1") {
         R_i <- matrix(rho^abs(outer(seq(n_i), seq(n_i), "-")), nrow = n_i, ncol = n_i)
       }
-      V_i <- sigma2 * R_i
-      R_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
-      if (inherits(R_i_inv, "try-error")) {
-        R_i_inv <- eigenMapPseudoInverse(V_i)
+      # create working covariance matrix V_i
+      mu_i <- stats::predict(mod.0)[group_idx]
+      V_mu_i <- mu_i * (1 + mu_i / theta)
+      A_i <- diag(V_mu_i)
+      A_i_sqrt <- sqrt(A_i)  # same as taking the 1/2 power of A_i since A_i is diagonal
+      V_i <- A_i_sqrt %*% R_i %*% A_i_sqrt
+      # create matrices W_i and K_i
+      K_i <- diag(mu_i)
+      V_i_inv <- try({ eigenMapMatrixInvert(V_i) }, silent = TRUE)
+      if (inherits(V_i_inv, "try-error")) {
+        V_i_inv <- eigenMapPseudoInverse(V_i)
       }
-      X_alt_i_t <- t(X_alt_i)
-      U_i <- X_alt_i_t %*% (R_i_inv %*% residuals_i)
-      V_U_i <- X_alt_i_t %*% (R_i_inv %*% X_alt_i)
-      U <- U + U_i
-      V_U <- V_U + V_U_i
+      W_i <- K_i %*% V_i_inv %*% K_i
+      W[[i]] <- W_i
+      K[[i]] <- K_i
     }
+    W <- as.matrix(Matrix::bdiag(W))
+    K <- as.matrix(Matrix::bdiag(K))
+    K_inv <- try({ eigenMapMatrixInvert(K) }, silent = TRUE)
+    if (inherits(K_inv, "try-error")) {
+      K_inv <- eigenMapPseudoInverse(K)
+    }
+    # generate score vector 
+    U <- phi^(-1) * t(X_alt) %*% W %*% K_inv %*% r_null
+    # generate variance of score vector and invert it   
+    V_U <- phi^(-2) * t(X_alt) %*% W %*% X_alt
     V_U_inv <- try({ eigenMapMatrixInvert(V_U) }, silent = TRUE)
     if (inherits(V_U_inv, "try-error")) {
       V_U_inv <- eigenMapPseudoInverse(V_U)
     }
-    test_stat <- as.numeric(t(U) %*% V_U_inv %*% U)
-    df1 <- ncol(X_alt) - ncol(X_null)
-    p_value <- 1 - stats::pchisq(test_stat, df = df1)
-    res <- list(Score_Stat = test_stat,
-                DF = df1,
+    # estimate test statistic and accompanying p-value 
+    S <- t(U) %*% V_U_inv %*% U
+    S_df <- ncol(X_alt) - ncol(X_null)
+    p_value <- 1 - stats::pchisq(S, df = S_df)
+    res <- list(Score_Stat = S,
+                DF = S_df,
                 P_Val = p_value,
                 Notes = NA_character_)
   }

diff --git a/man/scoreTestGEE.Rd b/man/scoreTestGEE.Rd