up

microbiome · Nov 15, 2024 · aa5c258 · aa5c258
1 parent b53151f
commit aa5c258
Show file tree

Hide file tree

Showing 7 changed files with 54 additions and 56 deletions.
diff --git a/R/agglomerate.R b/R/agglomerate.R
@@ -50,7 +50,7 @@
 #'   regarded as empty. (Default: \code{c(NA, "", " ", "\t")}). They will be
 #'   removed if \code{na.rm = TRUE} before agglomeration.
 #'   
-#' @param empty.rows.rm \code{Logical scalar}. Defines whether rows including
+#' @param empty.rm \code{Logical scalar}. Defines whether rows including
 #' \code{empty.fields} in specified \code{rank} will be excluded.
 #' (Default: \code{TRUE})
 #'
@@ -69,7 +69,7 @@
 #'        whether to remove those columns of rowData that include only NAs after
 #'        agglomeration. (Default: \code{FALSE})
 #'        
-#'        \item \code{group.rm}: \code{Logical scalar}. Determines
+#'        \item \code{empty.rm}: \code{Logical scalar}. Determines
 #'        whether to remove rows that do not belong to any group, i.e., that
 #'        have \code{NA} value. (Default: \code{FALSE})
 #'        
@@ -160,9 +160,9 @@
 #' tse <- agglomerateByRank(tse, rank = "Genus")
 #' tse <- transformAssay(tse, method = "pa")
 #'
-#' # removing empty labels by setting empty.rows.rm = TRUE
+#' # removing empty labels by setting empty.rm = TRUE
 #' sum(is.na(rowData(GlobalPatterns)$Family))
-#' x3 <- agglomerateByRank(GlobalPatterns, rank="Family", empty.rows.rm = TRUE)
+#' x3 <- agglomerateByRank(GlobalPatterns, rank="Family", empty.rm = TRUE)
 #' nrow(x3) # different from x2
 #'
 #' # Because all the rownames are from the same rank, rownames do not include
@@ -285,7 +285,7 @@ setMethod(
 #' @importFrom SummarizedExperiment rowData rowData<-
 #' @export
 setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
-    function(x, rank = taxonomyRanks(x)[1], empty.rows.rm = TRUE,
+    function(x, rank = taxonomyRanks(x)[1], empty.rm = TRUE,
         empty.fields = c(NA, "", " ", "\t", "-", "_"), ...){
         # Input check
         if(nrow(x) == 0L){
@@ -296,8 +296,8 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
             stop("'rank' must be a non-empty single character value",
                 call. = FALSE)
         }
-        if(!.is_a_bool(empty.rows.rm)){
-            stop("'empty.rows.rm' must be TRUE or FALSE.", call. = FALSE)
+        if(!.is_a_bool(empty.rm)){
+            stop("'empty.rm' must be TRUE or FALSE.", call. = FALSE)
         }
         if(ncol(rowData(x)) == 0L){
             stop("taxonomyData needs to be populated.", call. = FALSE)
@@ -311,10 +311,10 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
         # Get the indices of detected rank columns from rowData
         tax_cols <- .get_tax_cols_from_se(x)
 
-        # if empty.rows.rm is TRUE, remove those rows that have empty,
+        # if empty.rm is TRUE, remove those rows that have empty,
         # white-space, NA values in rank information. I.e., they do not have
         # taxonomy information in specified taxonomy level.
-        if( empty.rows.rm ){
+        if( empty.rm ){
             x <- .remove_with_empty_taxonomic_info(
                 x, tax_cols[col_idx], empty.fields)
         }
@@ -329,16 +329,14 @@ setMethod("agglomerateByRank", signature = c(x = "SummarizedExperiment"),
         # Get groups of taxonomy entries, i.e., get the specified rank
         # column from rowData
         tax_factors <- .get_tax_groups(x, col = col_idx, ...)
-        # Convert to factors. Use group.rm so that NA values are not
+        # Convert to factors. Use empty.rm so that NA values are not
         # preserved. i.e. they are not converted into character values.
         # NA values are handled earlier in this function.
-        tax_factors <- .norm_f(nrow(x), tax_factors, group.rm = TRUE)
+        tax_factors <- .norm_f(nrow(x), tax_factors, empty.rm = TRUE)
 
         # Agglomerate data by utilizing agglomerateByVariable
         args <- c(list(
-            x, by = "rows", group = tax_factors, group.rm = TRUE),
-            list(...)[ !names(list(...)) %in% c("group.rm") ]
-        )
+            x, by = "rows", group = tax_factors, empty.rm = TRUE), list(...))
         x <- do.call(agglomerateByVariable, args)
 
         # Replace the values to the right of the rank with NA_character_.

diff --git a/R/merge.R b/R/merge.R
@@ -1,9 +1,9 @@
 # This function can be used to unify the group id vector. It can be any
 # kind of vector, but this converts it to factor.
 .norm_f <- function(
-        i, f, dim.type = c("rows","columns"), group.rm = FALSE, ...){
-    if(!.is_a_bool(group.rm)){
-        stop("'group.rm' must be TRUE or FALSE.", call. = FALSE)
+        i, f, dim.type = c("rows","columns"), empty.rm = FALSE, ...){
+    if(!.is_a_bool(empty.rm)){
+        stop("'empty.rm' must be TRUE or FALSE.", call. = FALSE)
     }
     dim.type <- match.arg(dim.type)
     if(!is.character(f) && !is.factor(f)){
@@ -16,7 +16,7 @@
             call. = FALSE)
     }
     # This is done otherwise we lose NA values
-    if( !group.rm && any(is.na(f)) ){
+    if( !empty.rm && any(is.na(f)) ){
         f <- as.character(f)
         f[ is.na(f) ] <- "NA"
     }

diff --git a/man/agglomerate-methods.Rd b/man/agglomerate-methods.Rd
diff --git a/tests/testthat/test-3agglomerate.R b/tests/testthat/test-3agglomerate.R
@@ -27,27 +27,27 @@ test_that("agglomerate", {
     expect_equal(assays(actual)$mat[2,1],c(b = 36))
     expect_equal(assays(actual)$mat[3,1],c(c = 24))
     #
-    expect_error(agglomerateByRank(xtse,"",empty.rows.rm=FALSE),
+    expect_error(agglomerateByRank(xtse,"",empty.rm=FALSE),
                  "'rank' must be a non-empty single character value")
-    expect_error(agglomerateByRank(xtse,"Family",empty.rows.rm=""),
-                 "'empty.rows.rm' must be TRUE or FALSE")
+    expect_error(agglomerateByRank(xtse,"Family",empty.rm=""),
+                 "'empty.rm' must be TRUE or FALSE")
     expect_error(
-        agglomerateByRank(xtse,"Family",empty.rows.rm=FALSE,update.tree=""),
+        agglomerateByRank(xtse,"Family",empty.rm=FALSE,update.tree=""),
         "'update.tree' must be TRUE or FALSE")
     xtse2 <- xtse
     rowData(xtse2) <- NULL
-    expect_error(agglomerateByRank(xtse2,"Family",empty.rows.rm=FALSE),
+    expect_error(agglomerateByRank(xtse2,"Family",empty.rm=FALSE),
                  "taxonomyData needs to be populated")
     #
-    actual <- agglomerateByRank(xtse,"Family",empty.rows.rm=FALSE)
+    actual <- agglomerateByRank(xtse,"Family",empty.rm=FALSE)
     expect_equivalent(rowData(actual),rowData(actual_family))
-    actual <- agglomerateByRank(xtse,"Phylum",empty.rows.rm=FALSE)
+    actual <- agglomerateByRank(xtse,"Phylum",empty.rm=FALSE)
     expect_equivalent(rowData(actual),rowData(actual_phylum))
     #
-    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, empty.rows.rm = TRUE)
+    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, empty.rm = TRUE)
     expect_equal(dim(actual),c(6,10))
     expect_equal(rowData(actual)$Family,c("c","d","e","f","g","h"))
-    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, empty.rows.rm = FALSE)
+    actual <- agglomerateByRank(xtse,"Family", ignore.taxonomy = FALSE, empty.rm = FALSE)
     expect_equal(dim(actual),c(8,10))
     expect_equal(rowData(actual)$Family,c("c","d","e","f","g","h",NA,NA))
     actual <- agglomerateByRank(xtse,"Phylum")
@@ -63,22 +63,22 @@ test_that("agglomerate", {
     data(enterotype, package="mia")
     expect_equal(length(unique(rowData(enterotype)[,"Genus"])),
                  nrow(agglomerateByRank(enterotype,"Genus", ignore.taxonomy = FALSE, 
-                 empty.rows.rm = FALSE)))
+                 empty.rm = FALSE)))
 
     # agglomeration in all its forms
     data(GlobalPatterns, package="mia")
     se <- GlobalPatterns
     actual <- agglomerateByRank(se, rank = "Family", 
-        ignore.taxonomy = FALSE, empty.rows.rm = FALSE)
+        ignore.taxonomy = FALSE, empty.rm = FALSE)
     expect_equal(dim(actual),c(603,26))
     expect_equal(length(rowTree(actual)$tip.label),
                  length(rowTree(se)$tip.label))
     actual <- agglomerateByRank(se, rank = "Family", 
-        ignore.taxonomy = FALSE, empty.rows.rm = FALSE, update.tree = TRUE)
+        ignore.taxonomy = FALSE, empty.rm = FALSE, update.tree = TRUE)
     expect_equal(dim(actual),c(603,26))
     expect_equal(length(rowTree(actual)$tip.label), 603)
     actual <- agglomerateByRank(se, rank = "Family", 
-        ignore.taxonomy = FALSE, empty.rows.rm = FALSE, update.tree = TRUE)
+        ignore.taxonomy = FALSE, empty.rm = FALSE, update.tree = TRUE)
     expect_equal(dim(actual),c(603,26))
     expect_equal(length(rowTree(actual)$tip.label), nrow(actual))
     # Test that warning occurs when assay contian binary or negative values
@@ -92,30 +92,30 @@ test_that("agglomerate", {
     data(GlobalPatterns, package="mia")
     tse <- GlobalPatterns
 
-    # Check that empty.rows.rm works
+    # Check that empty.rm works
     # Get all phyla
     all_phyla <- unique( rowData(tse)$Phylum )
 
-    # When empty.rows.rm = FALSE, then phyla should also include NA --> one extra row
-    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", group.rm = FALSE)
-    test1 <- agglomerateByRank(tse, rank = "Phylum", empty.rows.rm = FALSE)
+    # When empty.rm = FALSE, then phyla should also include NA --> one extra row
+    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", empty.rm = FALSE)
+    test1 <- agglomerateByRank(tse, rank = "Phylum", empty.rm = FALSE)
 
     # Test that dimentionality is the same for merging object by agglomerateByRank
     # and agglomerateByVariable.
     expect_equal(nrow(test0), length(all_phyla))
     expect_equal(nrow(test1), length(all_phyla))
 
-    # When empty.rows.rm = TRUE, there should be as many rows as there are non-NA phyla
-    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", group.rm = TRUE)
-    test1 <- agglomerateByRank(tse, rank = "Phylum", empty.rows.rm = TRUE)
+    # When empty.rm = TRUE, there should be as many rows as there are non-NA phyla
+    test0 <- agglomerateByVariable(tse, by = 1, group = "Phylum", empty.rm = TRUE)
+    test1 <- agglomerateByRank(tse, rank = "Phylum", empty.rm = TRUE)
 
     # Test that dimensionality is the same when NA values are removed.
     expect_equal(nrow(test0), length( all_phyla[!is.na(all_phyla)] ))
     expect_equal(nrow(test1), length( all_phyla[!is.na(all_phyla)] ))
 
     # Check that there are more taxa when agglomeration is to "Species" level
-    test0 <- agglomerateByVariable(tse, by = 1, group = "Species", group.rm = FALSE)
-    test1 <- agglomerateByRank(tse, rank = "Species", empty.rows.rm = FALSE)
+    test0 <- agglomerateByVariable(tse, by = 1, group = "Species", empty.rm = FALSE)
+    test1 <- agglomerateByRank(tse, rank = "Species", empty.rm = FALSE)
     expect_equal(nrow(test0), 945)
     expect_equal(nrow(test1), 2307)
 
@@ -143,9 +143,9 @@ test_that("agglomerate", {
     expect_equal(rd1[, cols], rd2[, cols])
     expect_true( ncol(rd1) > ncol(rd2) )
     # Test that make.unique work
-    uniq <- agglomerateByRank(tse, rank = "Species", empty.rows.rm = FALSE)
+    uniq <- agglomerateByRank(tse, rank = "Species", empty.rm = FALSE)
     not_uniq <- agglomerateByRank(
-        tse, rank = "Species", make.unique = FALSE, empty.rows.rm = FALSE)
+        tse, rank = "Species", make.unique = FALSE, empty.rm = FALSE)
     expect_true( !any( duplicated(rownames(uniq)) ) )
     expect_true( any( duplicated(rownames(not_uniq)) ) )
 

diff --git a/tests/testthat/test-5dominantTaxa.R b/tests/testthat/test-5dominantTaxa.R
@@ -30,15 +30,15 @@ test_that("getDominant", {
                           "Order:Stramenopiles","Order:Stramenopiles","Order:Stramenopiles")
         names(exp.vals.two) <- exp.names.one
         expect_equal(
-            getDominant(tse, rank = "Genus", ignore.taxonomy = FALSE, empty.rows.rm = FALSE)[1:15],
+            getDominant(tse, rank = "Genus", ignore.taxonomy = FALSE, empty.rm = FALSE)[1:15],
             exp.vals.two)
 
         # Check if DominantTaxa is added to coldata
         expect_equal(
             colData(addDominant(tse, name="dominant"))$dominant[1:15],
             exp.vals.one)
         expect_equal(
-            colData(addDominant(tse,rank = "Genus", empty.rows.rm = FALSE, name="dominant"))$dominant[1:15],
+            colData(addDominant(tse,rank = "Genus", empty.rm = FALSE, name="dominant"))$dominant[1:15],
             exp.vals.two)
 
         # Check if DominantTaxa is added when factor is passed
@@ -52,7 +52,7 @@ test_that("getDominant", {
         test <- tse
         rowData(test)$group <- rowData(tse)$Genus
         expect_equal(
-            colData(addDominant(test, rank = "group", group.rm = TRUE, name = "dominant"))$dominant[1:15],
+            colData(addDominant(test, rank = "group", empty.rm = TRUE, name = "dominant"))$dominant[1:15],
             exp.vals.three)
 
         tse1 <- tse

diff --git a/tests/testthat/test-5prevalence.R b/tests/testthat/test-5prevalence.R
@@ -106,16 +106,16 @@ test_that("getPrevalence", {
     remove <- c(15, 200)
     assay(tse, "counts")[remove, ] <- NA
     # Check that agglomeration works
-    tse_agg <- agglomerateByRank(tse, ignore.taxonomy = FALSE, empty.rows.rm = FALSE, rank = rank)
+    tse_agg <- agglomerateByRank(tse, ignore.taxonomy = FALSE, empty.rm = FALSE, rank = rank)
     expect_warning(ref <- getPrevalence(tse_agg, na.rm = FALSE))
-    expect_warning(res <- getPrevalence(tse, rank = "Genus", empty.rows.rm = FALSE))
+    expect_warning(res <- getPrevalence(tse, rank = "Genus", empty.rm = FALSE))
     expect_true( all(res == ref, na.rm = TRUE) )
     #
     tse_agg <- agglomerateByRank(
-        tse, ignore.taxonomy = FALSE, empty.rows.rm = TRUE, rank = rank)
+        tse, ignore.taxonomy = FALSE, empty.rm = TRUE, rank = rank)
     ref <- getPrevalence(tse_agg, na.rm = TRUE)
     res <- getPrevalence(
-        tse, na.rm = TRUE, rank = "Genus", empty.rows.rm = TRUE)
+        tse, na.rm = TRUE, rank = "Genus", empty.rm = TRUE)
     expect_true( all(res == ref, na.rm = TRUE) )
 })
 

diff --git a/vignettes/mia.Rmd b/vignettes/mia.Rmd
@@ -137,13 +137,13 @@ becomes very easy.
 altExp(tse, "family") <- x2
 ```
 
-Keep in mind, that if you set `empty.rows.rm = TRUE`, rows with `NA` or similar value
+Keep in mind, that if you set `empty.rm = TRUE`, rows with `NA` or similar value
 (defined via the `empty.fields` argument) will be removed. Depending on these
 settings different number of rows will be returned.
 
 ```{r}
-x1 <- agglomerateByRank(tse, rank = "Species", empty.rows.rm = TRUE)
-altExp(tse,"species") <- agglomerateByRank(tse, rank = "Species", empty.rows.rm = FALSE)
+x1 <- agglomerateByRank(tse, rank = "Species", empty.rm = TRUE)
+altExp(tse,"species") <- agglomerateByRank(tse, rank = "Species", empty.rm = FALSE)
 dim(x1)
 dim(altExp(tse,"species"))
 ```