From d4ceb8dea1509cbc4bd9133d38e38fa7114b514a Mon Sep 17 00:00:00 2001
From: "Rossum, Bart-Jan van" <bart-jan.vanrossum@wur.nl>
Date: Fri, 7 Jun 2019 09:52:34 +0200
Subject: [PATCH] Added option spatial to fitModels for asreml (#9)

---
 .gitlab-ci.yml   |   4 +-
 DESCRIPTION      |   5 +-
 NAMESPACE        |   1 +
 R/fitModels.R    | 183 ++++++++++++++++++++++++++++++++++++++++++++---
 R/statgenHTP.R   |   2 +-
 README.md        |   4 +-
 man/fitModels.Rd |   7 +-
 testScript.R     |  29 +++++---
 8 files changed, 210 insertions(+), 25 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index b53c7bb..464a40d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -17,12 +17,14 @@ before_script:
   - apt-get update && apt-get install -y libxml2-dev libssl-dev libcurl4-openssl-dev zlib1g-dev libglu1-mesa-dev libfreetype6-dev > /dev/null
   - apt-get update && apt-get install -y --no-install-recommends qpdf ghostscript > /dev/null
   ## install pandoc - needed for checking readme.
-  - apt-get update && apt-get install -y pandoc > /dev/null
+  - apt-get update && apt-get install -y pandoc pandoc-citeproc > /dev/null
   ## install ImageMagick++ for timelapse.
   - apt-get update && apt-get install -y libmagick++-dev > /dev/null
   ## cache R packages.
   - mkdir -p $R_LIBS_USER
   - echo 'R_LIBS_USER='$R_LIBS_USER > .Renviron
+  ## Set _R_CHECK_FORCE_SUGGESTS_ to FALSE so R check doesn't crash over asreml.
+  - echo '_R_CHECK_FORCE_SUGGESTS_="false"' >> .Renviron
 
 R-release:
   stage: build
diff --git a/DESCRIPTION b/DESCRIPTION
index 004cd89..07184c6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,7 +11,7 @@ Description: What the package does (one paragraph).
 License: GPL
 Encoding: UTF-8
 LazyData: true
-Imports: 
+Imports:
     ggforce,
     ggplot2,
     lubridate,
@@ -19,6 +19,7 @@ Imports:
     scales,
     SpATS,
     animation
-RoxygenNote: 6.1.1
 Suggests: 
+    asreml (>= 4.0),
     covr
+RoxygenNote: 6.1.1
diff --git a/NAMESPACE b/NAMESPACE
index 5a2da90..34b4a30 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -19,5 +19,6 @@ import(stats)
 importFrom(SpATS,predict.SpATS)
 importFrom(grDevices,topo.colors)
 importFrom(graphics,plot)
+importFrom(utils,capture.output)
 importFrom(utils,hasName)
 importFrom(utils,write.csv)
diff --git a/R/fitModels.R b/R/fitModels.R
index 2de663f..1f397cd 100644
--- a/R/fitModels.R
+++ b/R/fitModels.R
@@ -11,6 +11,9 @@
 #' group the genotypes in the model.
 #' @param useCheck Should check genotypes be used as an extra factor in the
 #' model?
+#' @param engine A character string indicating the engine used to fit the
+#' models.
+#' @param spatial Should a spatial model be fitted for asreml?
 #'
 #' @return An object of class fitMod, a list of fitted spatial models.
 #'
@@ -20,7 +23,8 @@ fitModels <- function(TP,
                       covariates = NULL,
                       geno.decomp = NULL,
                       useCheck = FALSE,
-                      engine = c("SpATS", "asreml")) {
+                      engine = c("SpATS", "asreml"),
+                      spatial = FALSE) {
   ## Checks.
   if (!inherits(TP, "TP")) {
     stop("TP should be an object of class TP.\n")
@@ -117,7 +121,7 @@ fitModels <- function(TP,
     ## fixed in asreml needs response variable on lhs of formula.
     fixedForm <- update(fixedForm, paste(trait, "~ ."))
     ## Construct formula for random part of the model.
-    randForm <- formula(paste("~ colId + ", if (is.null(geno.decomp)) genoCol else
+    randForm <- formula(paste("~ ", if (is.null(geno.decomp)) genoCol else
       paste0("at(", geno.decomp, "):genotype")))
     ## Loop on timepoint to run asreml.
     fitMods <- lapply(X = TP, function(timePoint) {
@@ -125,19 +129,176 @@ fitModels <- function(TP,
       ## Only keep columns needed for analysis.
       modDat <- droplevels(timePoint)
       ## Run model.
-      asrFit <- asreml::asreml(fixed = update(fixedForm, paste(trait, "~ .")),
-                               random = randForm, data = modDat, trace = FALSE,
-                               na.action = asreml::na.method(x = "include"),
-                               maxiter = 200)
-      ## evaluate call terms in mr and mfTrait so predict can be run.
-      asrFit$call$fixed <- eval(asrFit$call$fixed)
-      asrFit$call$random <- eval(asrFit$call$random)
-      asrFit$call$data <- substitute(modDat)
+      if (!spatial) {
+        asrFit <- asreml::asreml(fixed = update(fixedForm, paste(trait, "~ .")),
+                                 random = randForm, data = modDat,
+                                 trace = FALSE, maxiter = 200,
+                                 na.action = asreml::na.method(x = "include"))
+        ## evaluate call terms in mr and mfTrait so predict can be run.
+        asrFit$call$fixed <- eval(asrFit$call$fixed)
+        asrFit$call$random <- eval(asrFit$call$random)
+        asrFit$call$data <- substitute(modDat)
+      } else {
+        asrFitSpat <- bestSpatMod(modDat = modDat, traits = trait,
+                                  fixedForm = fixedForm, randomForm = randForm)
+        asrFit <- asrFitSpat[["fitMods"]][[trait]]
+        attr(x = asrFit, which = "sumTab") <- asrFitSpat[["sumTab"]]
+      }
       return(asrFit)
     })
   }
   return(createFitMod(fitMods))
 }
 
-
+#' Helper function for calculating best spatial model using asreml.
+#' @noRd
+#' @keywords internal
+bestSpatMod <- function(modDat,
+                        traits,
+                        regular = FALSE,
+                        criterion = "AIC",
+                        fixedForm,
+                        randomForm,
+                        ...) {
+  dotArgs <- list(...)
+  ## Increase max number of iterations for asreml.
+  maxIter <- 200
+  ## Add empty observations.
+  TPTab <- as.data.frame(table(modDat[["colId"]], modDat[["rowId"]]))
+  TPTab <- TPTab[TPTab$Freq == 0, , drop = FALSE]
+  if (nrow(TPTab) > 0) {
+    extObs <- setNames(as.data.frame(matrix(nrow = nrow(TPTab),
+                                            ncol = ncol(modDat))),
+                       colnames(modDat))
+    extObs[["timePoint"]] <- modDat[["timePoint"]][1]
+    extObs[, c("colId", "rowId")] <- TPTab[, c("Var1", "Var2")]
+    extObs[, c("colNum", "rowNum")] <-
+      c(as.numeric(levels(TPTab[, "Var1"]))[TPTab[, "Var1"]],
+        as.numeric(levels(TPTab[, "Var2"]))[TPTab[, "Var2"]])
+    modDat <- rbind(modDat, extObs)
+  }
+  ## TP needs to be sorted by row and column to prevent asreml from crashing.
+  modDat <- modDat[order(modDat[["rowId"]], modDat[["colId"]]), ]
+  ## Define random terms of models to try.
+  randTerm <- c("NULL", rep(x = c("NULL", "units"), each = 3))
+  if (regular) {
+    ## Define spatial terms of models to try.
+    spatCh <- c("none", rep(x = c("exp(x)id", "id(x)exp",
+                                  "isotropic exponential"), times = 2))
+    spatTerm <- c(NA, paste("~", rep(x = c("exp(rowNum):colNum",
+                                           "rowNum:exp(colNum)",
+                                           "iexp(rowNum,colNum)"),
+                                     times = 2)))
+  } else {
+    spatCh <- c("none", rep(x = c("AR1(x)id", "id(x)AR1", "AR1(x)AR1"),
+                            times = 2))
+    spatTerm <- c(NA, paste("~",
+                            rep(x = c("ar1(rowId):colId", "rowId:ar1(colId)",
+                                      "ar1(rowId):ar1(colId)"), times = 2)))
+  }
+  ## Create empty base lists.
+  fitMods <- spatial <- sumTab <- setNames(vector(mode = "list",
+                                             length = length(traits)),
+                                      traits)
+  btCols <- c("spatial", "random", "AIC", "BIC", "row", "col", "error",
+              "correlated error", "converge")
+  for (trait in traits) {
+    ## Reset criterion to Inf.
+    criterionBest <- Inf
+    ## Create data.frame for storing summary for current trait.
+    modSum <- as.data.frame(matrix(nrow = length(spatCh), ncol = length(btCols),
+                                   dimnames = list(NULL, btCols)))
+    ## Fit model with genotype random for all different random/spatial terms.
+    for (i in seq_along(randTerm)) {
+      ## Add extra random term to random part.
+      randForm <- update(randomForm, paste("~ . + rowId + colId +", randTerm[i]))
+      asrArgs <- c(list(fixed = fixedForm, random = randForm, aom = TRUE,
+                         data = modDat, maxiter = maxIter, trace = FALSE,
+                         na.action = asreml::na.method(x = "include")),
+                    dotArgs)
+      if (!is.na(spatTerm[i])) {
+        asrArgs[["residual"]] <- formula(spatTerm[i])
+      }
+      capture.output(fitMod <- tryCatchExt(do.call(what = asreml::asreml,
+                                                    args = asrArgs)),
+                     file = tempfile())
+      if (!is.null(fitMod$warning)) {
+        fitMod <- chkLastIter(fitMod)
+        fitMod <- wrnToErr(fitMod)
+      }
+      if (length(fitMod$warning) != 0) {
+        warning(paste0("Warning in asreml for model ", spatCh[i],
+                       " genotype random, trait ", trait, " in timePoint ",
+                       modDat[["timePoint"]][1], ":\n", fitMod$warning,
+                       "\n"), call. = FALSE)
+      }
+      if (is.null(fitMod$error)) {
+        fitMod <- fitMod$value
+      } else {
+        warning(paste0("Error in asreml for model ", spatCh[i],
+                       " genotype random, trait ", trait, " in timePoint ",
+                       modDat[["timePoint"]][1], ":\n", fitMod$error,
+                       "\n"), call. = FALSE)
+        fitMod <- NULL
+      }
+      ## Fill model summary table.
+      modSum[i, "spatial"] <- spatCh[i]
+      modSum[i, "random"] <- randTerm[i]
+      modSum[i, "converge"] <- isTRUE(!is.null(fitMod) & fitMod$converge)
+      if (!is.null(fitMod)) {
+        summ <- summary(fitMod)$varcomp["component"]
+        modSum[i, "AIC"] <- -2 * fitMod$loglik + 2 * nrow(summ)
+        modSum[i, "BIC"] <- -2 * fitMod$loglik +
+          log(length(fitted(fitMod))) * nrow(summ)
+        ## Row and column output differs for regular/non-regular.
+        ## Always max. one of the possibilities is in summary so rowVal and
+        ## colVal are always a single value.
+        rowVal <- summ[rownames(summ) %in%
+                         c("rowId:colId!rowId!cor", "rowNum:colNum!rowNum!pow",
+                           "iexp(rowNum,colNum)!pow"), ]
+        modSum[i, "row"] <- ifelse(length(rowVal) == 0, NA, rowVal)
+        colVal <- summ[rownames(summ) %in%
+                         c("rowId:colId!colId!cor", "rowNum:colNum!colNum!pow",
+                           "iexp(rowNum,colNum)!pow"), ]
+        modSum[i, "col"] <- ifelse(length(colVal) == 0, NA, colVal)
+        modSum[i, "error"] <-
+          ifelse(randTerm[i] == "units", summ[rownames(summ) == "units", ],
+                 summ[rownames(summ) %in% c("units!R", "rowId:colId!R",
+                                            "rowNum:colNum!R",
+                                            "iexp(rowNum,colNum)!R"), ])
+        if (randTerm[i] == "units") {
+          modSum[i, "correlated error"] <-
+            summ[rownames(summ) %in% c("rowId:colId!R", "rowNum:colNum!R",
+                                       "iexp(rowNum,colNum)!R"), ]
+        }
+        ## If current model is better than best so far based on chosen criterion
+        ## define best model as current model.
+        if (fitMod$converge) {
+          if (criterion == "AIC") {
+            criterionCur <- modSum[i, "AIC"]
+          } else {
+            criterionCur <- modSum[i, "BIC"]
+          }
+        }
+        if (criterionCur < criterionBest) {
+          bestModTr <- fitMod
+          ## Evaluate call terms in bestModTr so predict can be run.
+          ## Needs to be called in every iteration to prevent final result
+          ## from always having the values of the last iteration.
+          bestModTr$call$fixed <- eval(bestModTr$call$fixed)
+          bestModTr$call$random <- eval(bestModTr$call$random)
+          bestModTr$call$residual <- eval(bestModTr$call$residual)
+          bestModTr$call$data <- substitute(modDat)
+          criterionBest <- criterionCur
+          bestMod <- i
+        }
+      }
+    }
+    fitMods[[trait]] <- bestModTr
+    spatial[[trait]] <- spatCh[bestMod]
+    attr(x = modSum, which = "chosen") <- bestMod
+    sumTab[[trait]] <- modSum
+  } # End for traits.
+  return(list(fitMods = fitMods, spatial = spatial, sumTab = sumTab))
+}
 
diff --git a/R/statgenHTP.R b/R/statgenHTP.R
index 725026b..e3769b5 100644
--- a/R/statgenHTP.R
+++ b/R/statgenHTP.R
@@ -4,7 +4,7 @@
 #'
 #' @docType package
 #' @name statgenHTP
-#' @importFrom utils hasName write.csv
+#' @importFrom utils hasName write.csv capture.output
 #' @importFrom graphics plot
 #' @importFrom grDevices topo.colors
 #' @import stats
diff --git a/README.md b/README.md
index d12292b..9a4584a 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,9 @@
+# statgenHTP
+
 <!-- badges: start -->
 [![pipeline status](https://git.wur.nl/rossu027/statgenHTP/badges/master/pipeline.svg)](https://git.wur.nl/rossu027/statgenHTP/commits/master)
 [![coverage report](https://git.wur.nl/rossu027/statgenHTP/badges/master/coverage.svg)](https://git.wur.nl/rossu027/statgenHTP/commits/master)
 <!-- badges: end -->
 
-# statgenHTP
+
 
diff --git a/man/fitModels.Rd b/man/fitModels.Rd
index f292cd4..1a60afe 100644
--- a/man/fitModels.Rd
+++ b/man/fitModels.Rd
@@ -5,7 +5,7 @@
 \title{Fit spatial models per time point}
 \usage{
 fitModels(TP, trait, covariates = NULL, geno.decomp = NULL,
-  useCheck = FALSE, engine = c("SpATS", "asreml"))
+  useCheck = FALSE, engine = c("SpATS", "asreml"), spatial = FALSE)
 }
 \arguments{
 \item{TP}{An object of class TP.}
@@ -21,6 +21,11 @@ group the genotypes in the model.}
 
 \item{useCheck}{Should check genotypes be used as an extra factor in the
 model?}
+
+\item{engine}{A character string indicating the engine used to fit the
+models.}
+
+\item{spatial}{Should a spatial model be fitted for asreml?}
 }
 \value{
 An object of class fitMod, a list of fitted spatial models.
diff --git a/testScript.R b/testScript.R
index f1593d4..7061740 100644
--- a/testScript.R
+++ b/testScript.R
@@ -35,15 +35,23 @@ plot(inTP, plotType = "raw", traits = "pheno",
      genotypes = c("col", "ely", "evo1", "ler"))
 
 fitMods <- fitModels(TP = inTP[1:3], trait = "pheno",
-                     covariates = c("repId", "Image_pos"))
+                     covariates = c("repId", "Image_pos"), useCheck = TRUE)
 fitMods1b <- fitModels(TP = inTP[1:3], trait = "pheno",
                        covariates = c("repId", "Image_pos"), engine = "asreml")
 
+fitMods1c <- fitModels(TP = inTP[1:3], trait = "pheno",
+                       covariates = c("repId", "Image_pos"), engine = "asreml",
+                       useCheck = TRUE)
+
 genoPreds <- getGenoPred(fitMods, outFile = "BLUPs_PAM_modRep.csv")
+colPreds <- getColPred(fitMods)
 genoPreds1b <- getGenoPred(fitMods1b)
+genoPreds1c <- getGenoPred(fitMods1c)
+colPreds1c <- getColPred(fitMods1c)
 spatCorr <- getCorrected(fitMods, outFile = "Corrected_PAM_modRep.csv")
 variance <- getVar(fitMods)
 h2 <- getHerit(fitMods)
+genoBLUPS <- getBLUPsGeno(fitMods)
 
 plot(fitMods, plotType = "corrPred")
 plot(fitMods, plotType = "rawPred")
@@ -69,22 +77,27 @@ pdf("Phenovator_ZA17_raw_data_na.pdf", height = 8, width = 12)
 plot(inTP2, plotType = "raw", traits = "LA_Estimated", geno.decomp = "Scenario")
 dev.off()
 
-fitMods2 <- fitModels(TP = inTP2[1:1], trait = "LA_Estimated",
+fitMods2 <- fitModels(TP = inTP2[10:10], trait = "LA_Estimated",
                       geno.decomp = c("Scenario", "population"))
-fitMods2b <- fitModels(TP = inTP2[1:1], trait = "LA_Estimated",
+fitMods2b <- fitModels(TP = inTP2[10:10], trait = "LA_Estimated",
                        geno.decomp = c("Scenario", "population"),
                        engine = "asreml")
+fitMods2c <- fitModels(TP = inTP2[10:10], trait = "LA_Estimated",
+                       geno.decomp = c("Scenario", "population"),
+                       engine = "asreml", spatial = TRUE)
+
 
 ## This crashes:
-fitMods2c <- fitModels(TP = inTP2[1:3], trait = "LA_Estimated",
+fitMods2crash <- fitModels(TP = inTP2[1:3], trait = "LA_Estimated",
                        geno.decomp = "Scenario", covariates = "population")
 
 genoPreds2 <- getGenoPred(fitMods2, outFile = "BLUPs_ZA17_LeafArea.csv")
-<<<<<<< HEAD
-genoBLUPs2 <- getBLUPsGeno(fitMods2)
-=======
 genoPreds2b <- getGenoPred(fitMods2b)
->>>>>>> fffa8f8... Added prediction for asreml to predGeno (#6)
+genoPreds2c <- getGenoPred(fitMods2c)
+
+genoBLUPs2 <- getBLUPsGeno(fitMods2)
+rowPreds2c <- getRowPred(fitMods2c)
+
 spatCorr2 <- getCorrected(fitMods2, outFile = "Corrected_ZA17_LeafArea.csv")
 variance2 <- getVar(fitMods2)
 h22 <- getHerit(fitMods2)