Skip to content

Commit

Permalink
rm outliers
Browse files Browse the repository at this point in the history
  • Loading branch information
Flavjack committed Jul 5, 2024
1 parent 0ed6afd commit 2eb004a
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 2 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Imports:
DT
Suggests:
gsheet,
cowplot,
knitr,
rmarkdown,
bookdown
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export(outliers_remove)
export(plot_diag)
export(plot_raw)
export(plot_smr)
export(remove_outliers)
export(split_folder)
export(table2qmd)
export(table2rmd)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@

# inti 0.6.6

- Package
- Update function to remove outliers => "`remove_outliers`"

- Rticles
- Fix Tables and Figures order in final document


# inti 0.6.5

- Package
Expand Down
4 changes: 2 additions & 2 deletions R/plot_diag.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ plot_diag <- function( model, title = NA) {

# Histogram
p1 <- ggplot(dt , aes(.data$.resid)) +
geom_histogram(aes_string(y = "..density..", fill = "..count.."),
geom_histogram(aes(y = after_stat(density)),
color = "black", fill = "grey82", bins = 30) +
stat_function(fun = stats::dnorm, color = "blue", args = list(mean = 0, sd = sd(dt$.resid))) +
theme_bw() +
Expand Down Expand Up @@ -96,7 +96,7 @@ plot_diag <- function( model, title = NA) {
, subtitle = "Homoscedasticity"
)

p5 <-ggplot(dt, aes(stats::hatvalues(model), .data$.resid)) +
p5 <- ggplot(dt, aes(stats::hatvalues(model), .data$.resid)) +
stat_smooth(method="loess", na.rm=TRUE) +
xlab("Leverage")+ylab("Standardized Residuals") +
ggtitle("Residual vs Leverage Plot") +
Expand Down
127 changes: 127 additions & 0 deletions R/remove_outliers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#' Remove outliers
#'
#' Function to remove outliers in MET experiments
#'
#' @param data Experimental design data frame with the factors and traits.
#' @param trait Name of the trait.
#' @param model The fixed or random effects in the model.
#' @param drop_na drop NA values from the data.frame
#'
#' @description
#'
#' Use the method M4 in Bernal Vasquez (2016). Bonferroni Holm test to judge
#' residuals standardized by the re scaled MAD (BH MADR).
#'
#' @return list. 1. Table with date without outliers. 2. The outliers in the
#' dataset.
#'
#' @references
#'
#' Bernal Vasquez, Angela Maria, et al. “Outlier Detection Methods for
#' Generalized Lattices: A Case Study on the Transition from ANOVA to REML.”
#' Theoretical and Applied Genetics, vol. 129, no. 4, Apr. 2016.
#'
#' @importFrom stats median pnorm residuals p.adjust
#' @importFrom lme4 lmer
#'
#' @export
#'
#' @examples
#'
#' library(inti)
#'
#' rmout <- remove_outliers(
#' data = potato
#' , formula = stemdw ~ 0 + (1|bloque) + treat*geno + geno
#' , drop_na = FALSE
#' , plot_diag = TRUE
#' )
#'
#' rmout
#'

remove_outliers <- function(data
, formula
, drop_na = TRUE
, plot_diag = FALSE
) {

# data = potato; drop_na = TRUE; plot_diag = TRUE
# formula = stemdw ~ 0 + (1|bloque) + treat*geno+ geno

out_flag <- bholm <- NULL

factors <- all.vars(formula)
trait <- factors[1]
mdfct <- factors[-1]

model <- lme4::lmer(formula, data)

# re-scaled MAD
resi <- cbind(residuals(model, type = "response"))
medi <- median(resi, na.rm = TRUE)
MAD <- median((abs(resi-medi)), na.rm = TRUE)
re_MAD <- MAD*1.4826
# end

# MAD standardized residuals
res_MAD <- resi/re_MAD
# end

# Calculate adjusted p-values
rawp.BHStud <- 2 * (1 - pnorm(abs(res_MAD)))

newdt <- data %>%
select({{factors}}) %>%
relocate({{trait}}, .after = last_col()) %>%
drop_na() %>% # fix model test bug?
cbind.data.frame(., resi, res_MAD, rawp.BHStud)

# Produce a Bonferroni-Holm tests for the adjusted p-values

test.BHStud <- p.adjust(rawp.BHStud, method = "holm")

BHStud_test <- tibble(adjp = rawp.BHStud
, bholm = test.BHStud
) %>%
rownames_to_column("index") %>%
mutate(out_flag = ifelse(bholm <0.05, "OUTLIER", "."))

outliers <- cbind(newdt, BHStud_test) %>%
dplyr::filter(out_flag %in% "OUTLIER")

nwdt <- cbind(newdt, BHStud_test) %>%
mutate({{trait}} := case_when(
!out_flag %in% "OUTLIER" ~ as.character(.data[[trait]])
, TRUE ~ NA_character_
)) %>%
mutate(across({{trait}}, as.numeric)) %>%
{if (isTRUE(drop_na)) {drop_na(data = ., any_of({{trait}}))} else {.}} %>%
select({{factors}}) %>%
relocate({{trait}}, .after = last_col())

diag <- if(plot_diag == TRUE) {

mdi <- model %>%
plot_diag() %>%
cowplot::plot_grid(plotlist = ., nrow = 1
, labels = "Row Data")

mdf <- lme4::lmer(formula, nwdt) %>%
plot_diag() %>%
cowplot::plot_grid(plotlist = ., nrow = 1
, labels = "Clean Data")

list(mdi, mdf) %>%
cowplot::plot_grid(plotlist = ., nrow = 2)

} else { NULL }


list(
data = nwdt
, outliers = outliers
, plot_diag = diag
)

}
47 changes: 47 additions & 0 deletions man/remove_outliers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2eb004a

Please sign in to comment.