From 94696fa7e78a4bba367fafd87f2606e636f0fabb Mon Sep 17 00:00:00 2001 From: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:08:16 +0200 Subject: [PATCH] Improve plotAbundance (#156) Signed-off-by: Daena Rys Co-authored-by: Daena Rys Co-authored-by: Muluh <127390183+Daenarys8@users.noreply.github.com> --- DESCRIPTION | 54 +- NAMESPACE | 7 +- NEWS | 5 +- R/plotAbundance.R | 895 +++++++++++++++------------ man/plotAbundance.Rd | 107 ++-- tests/testthat/test-2plotAbundance.R | 140 +++-- 6 files changed, 651 insertions(+), 557 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1b645e8..3987868 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: miaViz Title: Microbiome Analysis Plotting and Visualization -Version: 1.15.1 +Version: 1.15.2 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", @@ -33,44 +33,46 @@ Encoding: UTF-8 LazyData: false Depends: R (>= 4.0), - SummarizedExperiment, - TreeSummarizedExperiment, - mia (>= 1.13.0), ggplot2, - ggraph (>= 2.0) + ggraph (>= 2.0), + mia (>= 1.13.0), + SummarizedExperiment, + TreeSummarizedExperiment Imports: - methods, - stats, - S4Vectors, + ape, BiocGenerics, BiocParallel, DelayedArray, - scater, - ggtree, + DirichletMultinomial, + dplyr, ggnewscale, - viridis, + ggrepel, + ggplot2, + ggraph, + ggtree, + methods, + rlang, + scater, + S4Vectors, + SingleCellExperiment, + stats, tibble, - tidytext, - tidytree, tidygraph, - rlang, - purrr, tidyr, - dplyr, - ape, - DirichletMultinomial, - ggrepel, - SingleCellExperiment + tidytext, + tidytree, + viridis Suggests: - knitr, - rmarkdown, BiocStyle, - testthat, - patchwork, - vegan, bluster, + circlize, ComplexHeatmap, - circlize + ggh4x, + knitr, + patchwork, + rmarkdown, + testthat, + vegan Remotes: github::microbiome/miaTime Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index a0536b4..3732072 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -68,8 +68,11 @@ importFrom(ape,keep.tip) importFrom(ape,rotateConstr) importFrom(dplyr,"%>%") importFrom(dplyr,all_of) +importFrom(dplyr,arrange) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) +importFrom(dplyr,desc) +importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,last_col) @@ -78,6 +81,7 @@ importFrom(dplyr,n) importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) +importFrom(dplyr,row_number) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(dplyr,summarize) @@ -129,10 +133,10 @@ importFrom(mia,meltSE) importFrom(rlang,"!!") importFrom(rlang,":=") importFrom(rlang,sym) -importFrom(scater,plotExpression) importFrom(scater,plotReducedDim) importFrom(scater,retrieveCellInfo) importFrom(scater,retrieveFeatureInfo) +importFrom(stats,formula) importFrom(stats,median) importFrom(stats,sd) importFrom(tibble,rownames_to_column) @@ -140,6 +144,7 @@ importFrom(tibble,tibble) importFrom(tidygraph,activate) importFrom(tidygraph,as_tbl_graph) importFrom(tidygraph,as_tibble) +importFrom(tidyr,complete) importFrom(tidyr,drop_na) importFrom(tidyr,pivot_longer) importFrom(tidytext,reorder_within) diff --git a/NEWS b/NEWS index 906cfeb..36e02f4 100644 --- a/NEWS +++ b/NEWS @@ -32,4 +32,7 @@ Changes in version 1.13.x + Added getNeatOrder function + plotAbundance: enable plotting without agglomeration + Change parameter naming convention from parameter_name to parameter.name -+ Added plotLoadings function \ No newline at end of file ++ Added plotLoadings function + +Changes in version 1.15.x ++ plotAbundance: Improved visualization of sample metadata diff --git a/R/plotAbundance.R b/R/plotAbundance.R index ac46cb8..6cfe8d9 100644 --- a/R/plotAbundance.R +++ b/R/plotAbundance.R @@ -3,93 +3,91 @@ #' \code{plotAbundance()} creates a barplot of feature abundances, typically #' used to visualize the relative abundance of features at a specific taxonomy #' rank. -#' -#' It is recommended to handle subsetting, agglomeration, and transformation +#' +#' It is recommended to handle subsetting, agglomeration, and transformation #' outside this function. However, agglomeration and relative transformation -#' can be applied using the \code{group} and \code{as.relative} parameters, +#' can be applied using the \code{group} and \code{as.relative} parameters, #' respectively. If one of the \code{TAXONOMY_RANKS} is selected via #' \code{group}, \code{mia::agglomerateByRank()} is used, otherwise #' \code{agglomerateByVariable()} is applied. #' -#' -#' #' @param x a #' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} #' object. #' #' @param assay.type \code{Character scalar} value defining which assay data to #' use. (Default: \code{"relabundance"}) -#' +#' #' @param assay_name Deprecate. Use \code{assay.type} instead. -#' -#' @param col.var \code{Character scalar}. Selects a column from -#' \code{colData} to be plotted below the abundance plot. -#' Continuous numeric values will be plotted as point, whereas factors and -#' character will be plotted as colour-code bar. (Default: \code{NULL}) -#' -#' @param features Deprecated. Use \code{col.var} instead. -#' -#' @param order.row.by \code{Character scalar}. How to order abundance value: -#' By name (\code{"name"}) -#' for sorting the taxonomic labels alphabetically, by abundance -#' (\code{"abund"}) to sort by abundance values or by a reverse order of -#' abundance values (\code{"revabund"}). (Default: \code{"name"}) -#' -#' @param order_rank_by Deprecated. Use \code{order.row.by} instead. -#' -#' @param order.col.by \code{Character scalar}. from the chosen rank of -#' abundance data or from \code{colData} to select values to order the abundance -#' plot by. (Default: \code{NULL}) -#' -#' @param order_sample_by Deprecated. Use \code{order.col.by} instead. -#' -#' @param decreasing \code{Logical scalar}. If the \code{order.col.by} -#' is defined and the -#' values are numeric, should the values used to order in decreasing or -#' increasing fashion? (Default: \code{FALSE}) #' #' @param layout \code{Character scalar}. Either \dQuote{bar} or \dQuote{point}. -#' -#' @param one.facet \code{Logical scalar}. Should the plot be returned in on -#' facet or split into -#' different facet, one facet per different value detect in \code{group}. If -#' \code{col.var} or \code{order.col.by} is not \code{NULL}, this setting will -#' be disregarded. (Default: \code{TRUE}) -#' -#' @param one_facet Deprecated. Use \code{one.facet} instead. -#' -#' @param ncol \code{Numeric scalar}. if \code{one.facet = FALSE}, -#' \code{ncol} defines many -#' columns should be for plotting the different facets. (Default: \code{2}) -#' -#' @param scales \code{Character scalar}. Defines the behavior of the scales -#' of each facet. Both values are -#' passed onto \code{\link[ggplot2:facet_wrap]{facet_wrap}}. -#' (Default: \code{"fixed"}) -#' +#' #' @param ... additional parameters for plotting. #' \itemize{ -#' \item \code{group} \code{Character scalar}. Specifies the group for +#' \item \code{group}: \code{Character scalar}. Specifies the group for #' agglomeration. Must be a value from \code{colnames(rowData(x))}. If #' \code{NULL}, agglomeration is not applied. (Default: \code{NULL}) -#' -#' \item \code{as.relative} \code{Character scalar}. Should the relative +#' +#' \item \code{as.relative}: \code{Character scalar}. Should the relative #' values be calculated? (Default: \code{FALSE}) +#' +#' \item \code{col.var}: \code{Character scalar}. Selects a column from +#' \code{colData} to be plotted below the abundance plot. +#' Continuous numeric values will be plotted as point, whereas factors and +#' character will be plotted as colour-code bar. (Default: \code{NULL}) +#' +#' \item \code{order.row.by}: \code{Character scalar}. How to order abundance +#' value. By name (\code{"name"}) for sorting the taxonomic labels +#' alphabetically, by abundance (\code{"abund"}) to sort by abundance +#' values or by a reverse order of +#' abundance values (\code{"revabund"}). (Default: \code{"name"}) +#' +#' \item \code{row.levels}: \code{Character vector}. Specifies order of rows +#' in a plot. Can be combined with \code{order.row.by} to control order +#' of only certain rows. If \code{NULL}, the order follows +#' \code{order.row.by}. (Default: \code{NULL}) +#' +#' \item \code{order.col.by}: \code{Character scalar}. from the chosen rank of +#' abundance data or from \code{colData} to select values to order the +#' abundance plot by. (Default: \code{NULL}) +#' +#' \item \code{col.levels}: \code{Character vector}. Specifies order of +#' columns in a plot. Can be combined with \code{order.col.by} to control +#' order of only certain columns. If \code{NULL}, the order follows +#' \code{order.col.by}. (Default: \code{NULL}) +#' +#' \item \code{decreasing}: \code{Logical scalar}. If the \code{order.col.by} +#' is defined and the values are numeric, should the values used to order in +#' decreasing or increasing fashion? (Default: \code{FALSE}) +#' +#' \item \code{facet.rows}: \code{Logical scalar}. Should the rows in the +#' plot be spitted into facets? (Default: \code{FALSE}) +#' +#' \item \code{facet.cols}: \code{Logical scalar}. Should the columns in the +#' plot be spitted into facets? (Default: \code{FALSE}) +#' +#' \item \code{ncol}: \code{Numeric scalar}. if facets are applied, +#' \code{ncol} defines many columns should be for plotting the different +#' facets. (Default: \code{2}) +#' +#' \item \code{scales} \code{Character scalar}. Defines the behavior of the +#' scales of each facet. The value is passed into +#' \code{\link[ggplot2:facet_wrap]{facet_wrap}}. (Default: \code{"fixed"}) #' } #' See \code{\link{mia-plot-args}} for more details i.e. call #' \code{help("mia-plot-args")} #' -#' @return +#' @return #' a \code{\link[ggplot2:ggplot]{ggplot}} object or list of two #' \code{\link[ggplot2:ggplot]{ggplot}} objects, if `col.var` are added to -#' the plot. +#' the plot. #' #' @name plotAbundance #' #' @examples #' data(GlobalPatterns, package="mia") #' tse <- GlobalPatterns -#' +#' #' # If rank is set to NULL (default), agglomeration is not done. However, note #' # that there is maximum number of rows that can be plotted. That is why #' # we take sample from the data. @@ -99,29 +97,29 @@ #' # Apply relative transformation #' tse_sub <- transformAssay(tse_sub, method = "relabundance") #' plotAbundance(tse_sub, assay.type = "relabundance") -#' +#' #' # Plotting counts using the first taxonomic rank as default #' plotAbundance( #' tse, assay.type="counts", group = "Phylum") + #' labs(y="Counts") -#' +#' #' # Using "Phylum" as rank. Apply relative transformation to "counts" assay. #' plotAbundance( #' tse, assay.type="counts", group = "Phylum", add_legend = FALSE, #' as.relative = TRUE) -#' +#' #' # Apply relative transform #' tse <- transformAssay(tse, method = "relabundance") -#' +#' #' # A feature from colData or taxon from chosen rank can be used for ordering #' # samples. #' plotAbundance( #' tse, assay.type="relabundance", group = "Phylum", #' order.col.by = "Bacteroidetes") -#' +#' #' # col.var from colData can be plotted together with abundance plot. #' # Returned object is a list that includes two plot; other visualizes -#' ## abundance other col.var. +#' ## abundance other col.var. #' plot <- plotAbundance( #' tse, assay.type = "relabundance", group = "Phylum", #' col.var = "SampleType") @@ -129,33 +127,31 @@ #' # These two plots can be combined with wrap_plots function from patchwork #' # package #' library(patchwork) -#' wrap_plots(plot, ncol = 1) +#' wrap_plots(plot, ncol = 1, heights = c(0.95, 0.05)) #' } -#' +#' #' # Same plot as above but showing sample IDs as labels for the x axis on the -#' # top plot -#' plot[[1]] <- plotAbundance( +#' # top plot. Moreover, we use facets. +#' plot <- plotAbundance( #' tse, assay.type = "relabundance", #' group = "Phylum", col.var = "SampleType", add.legend = FALSE, -#' add.x.text = TRUE)[[1]] + -#' theme(axis.text.x = element_text(angle = 90)) -#' \donttest{ -#' wrap_plots(plot, ncol = 1, heights = c(0.8,0.2)) -#' } -#' +#' add.x.text = TRUE, facet.cols = TRUE, scales = "free_x") + +#' theme(axis.text.x = element_text(angle = 90)) +#' plot +#' #' # Compositional barplot with top 5 taxa and samples sorted by #' # "Bacteroidetes" -#' +#' #' # Getting top taxa on a Phylum level #' tse <- transformAssay(tse, method = "relabundance") #' tse_phylum <- agglomerateByRank(tse, rank = "Phylum") #' top_taxa <- getTop(tse_phylum, top = 5, assay.type = "relabundance") -#' +#' #' # Renaming the "Phylum" rank to keep only top taxa and the rest to "Other" #' phylum_renamed <- lapply(rowData(tse)$Phylum, function(x){ #' if (x %in% top_taxa) {x} else {"Other"}}) #' rowData(tse)$Phylum <- as.character(phylum_renamed) -#' +#' #' # Compositional barplot #' plotAbundance( #' tse, assay.type="relabundance", group = "Phylum", @@ -163,124 +159,94 @@ NULL #' @rdname plotAbundance -setGeneric("plotAbundance", signature = c("x"), - function(x, ...) - standardGeneric("plotAbundance")) - -.check_abund_plot_args <- function(one_facet = TRUE, - ncol = 2){ - if(!.is_a_bool(one_facet)){ - stop("'one_facet' must be TRUE or FALSE.", call. = FALSE) - } - if(!is.numeric(ncol) || as.integer(ncol) != ncol || ncol < 1){ - stop("'ncol' must be an integer value above or equal to 1.", - call. = FALSE) - } -} +setGeneric("plotAbundance", signature = c("x"), function(x, ...) + standardGeneric("plotAbundance")) #' @rdname plotAbundance -#' @importFrom scater plotExpression #' @importFrom ggplot2 facet_wrap #' @export -setMethod("plotAbundance", signature = c("SummarizedExperiment"), - function(x, - col.var = features, - features = NULL, - order.row.by = order_rank_by, - order_rank_by = c("name","abund","revabund"), - order.col.by = order_sample_by, - order_sample_by = NULL, - decreasing = TRUE, - layout = c("bar","point"), - one.facet = one_facet, - one_facet = TRUE, - ncol = 2, - scales = "fixed", - assay.type = assay_name, assay_name = "counts", - ...){ - ############################# INPUT CHECK ############################# - if(nrow(x) == 0L){ - stop("No data to plot. nrow(x) == 0L.", call. = FALSE) - } - .check_assay_present(assay.type, x) - .check_for_taxonomic_data_order(x) - layout <- match.arg(layout, c("bar","point")) - order.row.by <- match.arg(order.row.by, c("name","abund","revabund")) - .check_abund_plot_args(one_facet = one.facet, ncol = ncol) - if( !is.null(col.var) ){ - col.var <- match.arg(col.var, colnames(colData(x))) - } - ########################### INPUT CHECK END ########################### - # Get the abundance data to be plotted. Agglomerate and apply relative - # transformation if specified. - abund_data <- .get_abundance_data(x, assay.type, order.row.by, ...) - group <- attr(abund_data, "group") - # Order columns - order_col_by <- .norm_order_sample_by( - order.col.by, unique(abund_data$colour_by), x) - # Get additional column metadata to be plotted - features_data <- NULL - if(!is.null(col.var) || !is.null(order_col_by)){ - features_data <- .get_features_data(col.var, order_col_by, x) - } - # Order the whole data to follow user specified ordering - if(!is.null(order_col_by)){ - order_out <- .order_abund_feature_data( - abund_data, features_data, order_col_by, decreasing) - abund_data <- order_out$abund_data - features_data <- order_out$features_data - } - # Create the main plot - plot_out <- .abund_plotter(abund_data, - colour_by = group, - layout = layout, - ...) - # Create the column metadata plot and create a list from plots - if(!is.null(features_data)){ - plot_feature_out <- .features_plotter( - features_data, order.col.by, ...) - plot_out <- c(list(abundance = plot_out), plot_feature_out) - } else { - # Whether to split the main plot to multiple facets. This is - # disabled if user wants to plot also column metadata. - if (!one.facet) { - plot_out <- plot_out + - facet_wrap(~colour_by, ncol = ncol, scales = scales) - } - } - # Checks if the list is a ggplot object or regular list of ggplot - # objects - if( !is.ggplot(plot_out) ){ - # If features is specified, then only abundance and features plots - # are returned as a list. If it is not, then only abundance plot is - # returned. - if( !is.null(col.var) ){ - plot_out <- list( - abundance = plot_out[["abundance"]], plot_out[[col.var]]) - # Assigns the names back - names(plot_out) <- c("abundance", col.var) - } else{ - plot_out <- plot_out[["abundance"]] - } - } - return(plot_out) +setMethod("plotAbundance", signature = c("SummarizedExperiment"), function( + x, assay.type = assay_name, assay_name = "counts", layout = "bar", ...){ + # + .check_abundance_input(x, assay.type, layout, ...) + # Get the abundance data to be plotted. Agglomerate and apply relative + # transformation if specified. + abund_data <- .get_abundance_data(x, assay.type, ...) + group <- attr(abund_data, "group") + # If the data is paired, ensure that all time points have same sample + # set, i.e., each patient has all the time points. + abund_data <- .add_paired_samples(abund_data, ...) + # Order rows and columns + abund_data <- .order_abundance_rows(abund_data, ...) + abund_data <- .order_abundance_cols(abund_data, ...) + # Create the main plot + plot_out <- .abund_plotter( + abund_data, colour_by = group, layout = layout, ...) + # If user wants to incorporate sample information, add info as an own + # plot or use facets + plot_out <- .abund_plotter_incorporate_metadata(plot_out, abund_data, ...) + return(plot_out) } ) -#' @importFrom dplyr group_by summarize rename -#' @importFrom mia meltSE +################################ HELP FUNCTIONS ################################ +################################################################################ +# Data handlers + +.check_abundance_input <- function( + x, assay.type, layout, order.col.by = order_sample_by, + order_sample_by = NULL, col.var = features, features = NULL, ...){ + # + if( nrow(x) == 0L ){ + stop("No data to plot. nrow(x) == 0L.", call. = FALSE) + } + .check_assay_present(assay.type, x) + if( !(.is_a_string(layout) && layout %in% c("bar", "point")) ){ + stop("'layout' must be 'bar' or 'point'.", call. = FALSE) + } + if( !(is.null(order.col.by) || .is_a_string(order.col.by) ) ){ + stop("'order.col.by' must specify a single character value.", + call. = FALSE) + } + if( !(is.null(col.var) || (is.character(col.var) && + all(col.var %in% colnames(colData(x))))) ){ + stop("'col.var' must specify a column from colData(x).", + call. = FALSE) + } + # Check that all the colnames are unique in colData. The functions assume + # that columns have unique names. Moreover, the data must not have special + # names that we use here in these functions. + not_allowed <- c("X", "Y", "colour_by", assay.type) + if( any(colnames(colData(x)) %in% not_allowed) ){ + stop("colData(x) includes colnames that are not supported. Modify ", + "the names.\nThe following names are not allowed: '", + paste0(not_allowed, collapse = "', '"), "'", call. = FALSE) + } + # Leave order.col.by out of the comparison, if it specifies a feature + order.col.by <- if( !is.null(order.col.by) && order.col.by %in% + colnames(colData(x))) order.col.by else NULL + all_vars <- unique(c(order.col.by, col.var)) + if( sum(colnames(colData(x)) %in% all_vars) != length(all_vars) ){ + stop("colData(x) must have unique colnames.", call. = FALSE) + } + return(NULL) +} + +# This function wrangles the data to long format. It takes care of +# agglomeration and transformation. The outut is a single tibble with all the +# whole dataset for plotting. .get_abundance_data <- function( - x, assay.type, order_rank_by = "name", group = rank, rank = NULL, + x, assay.type, group = rank, rank = NULL, as.relative = use_relative, use_relative = FALSE, ...){ # Input check - if(!.is_a_bool(as.relative)){ - stop("'as.relative' must be TRUE or FALSE.", call. = FALSE) - } if( !(is.null(group) || ( .is_non_empty_string(group) && group %in% colnames(rowData(x)) )) ){ stop("'group' must be specify a name of a column from rowData or ", "NULL.", call. = FALSE) } + if(!.is_a_bool(as.relative)){ + stop("'as.relative' must be TRUE or FALSE.", call. = FALSE) + } # # Agglomerate data if user has specified if (!is.null(group) && group %in% taxonomyRanks(x)) { @@ -315,164 +281,199 @@ setMethod("plotAbundance", signature = c("SummarizedExperiment"), colnames(x) <- paste0("Sample", seq_len(ncol(x))) } # Melt TreeSE - data <- meltSE( - x, assay.type = assay.type, row.name = "colour_by", col.name = "X") + df <- meltSE( + x, assay.type = assay.type, row.name = "colour_by", col.name = "X", + add.col = TRUE) # Add correct column name for abundance values - colnames(data)[ colnames(data) == assay.type ] <- "Y" - # Reorder so that the order follows the order of sample names. The order is - # currently alphabetical. - data$X <- factor(data$X, levels = colnames(x)) - # Order values - if( order_rank_by == "name" ){ - # By default, factors follow alphabetical order. Order values, based - # on names i.e. alphabetical order. - lvl <- levels(data$colour_by) - lvl <- lvl[order(lvl)] - } else if( order_rank_by %in% c("abund","revabund") ){ - # Control the order - decreasing <- ifelse(order_rank_by == "abund",TRUE,FALSE) - # Get values for each feature and sum them up - o <- data %>% - select(!.data$X) %>% - group_by(.data$colour_by) %>% - summarize(sum = sum(.data$Y)) - # Order the data based on abundance. By default, feature that has - # highest library size comes first. - lvl <- o[order(o$sum, decreasing = decreasing), ] %>% - pull(.data$colour_by) %>% - as.character() - } else { - stop(".") - } - # Apply the order - data$colour_by <- factor(data$colour_by, lvl) - data <- data[order(data$colour_by),] + colnames(df)[ colnames(df) == assay.type ] <- "Y" # Add group info to attributes - attr(data, "group") <- ifelse(!is.null(group), group, "Feature") - return(data) + attr(df, "group") <- ifelse(!is.null(group), group, "Feature") + return(df) } -.norm_order_sample_by <- function(order_sample_by, factors, x){ - # If user did not specify the ordering, do nnothing - if(is.null(order_sample_by)){ - return(order_sample_by) - } - # Chec that the parameter is string - msg <- paste0("'order.col.by' must be a single non-empty character value, ", - "either present in the abundance data as group variable or ", - "in the column data of 'x'. (The abundance data takes ", - "precedence)") - if(!.is_non_empty_string(order_sample_by)){ - stop(msg, call. = FALSE) +# The paired option is for plotting the abundances so that we can compare +# time points, i.e., with facets where each row is different time point. +# However, if all the time points do not have all the samples, the patients +# are not aligned correctly (samples from certain patient are below each other). +# This function ensures that all the time points have all the patients so that +# comparison is possible. +#' @importFrom dplyr group_by summarize pull select distinct mutate +#' row_number ungroup +#' @importFrom tidyr complete +.add_paired_samples <- function( + df, paired = FALSE, order.col.by = order_sample_by, + order_sample_by = NULL, col.var = features, features = NULL, ...){ + # + if(!.is_a_bool(paired)){ + stop("'paired' must be TRUE or FALSE.", call. = FALSE) + } + # When paired is specified, order.col.by must be a single variable name from + # colData + if( paired && !(.is_a_string(order.col.by) && + order.col.by %in% colnames(df)) ){ + stop("When 'paired=TRUE', 'order.col.by' must specify single ", + "variable from colData(x).", call. = FALSE) + } + # When paired is specified, also col.data must be a single variable name + # from colData + if( paired && !(all(col.var %in% colnames(df))) ){ + stop("When 'paired=TRUE', 'col.var' must specify single ", + "variable from colData(x).", call. = FALSE) } # - # If the order variable is not found from the coloring values (samples can - # also be order based on abundance of certain taxon), check that the - # variable can be found from colData. - if(!(order_sample_by %in% factors)){ - tmp <- .get_feature_data(x, order_sample_by, msg = msg) - order_sample_by <- tmp$name - } - return(order_sample_by) -} - -# This funtion retrives data from colData without failing (if the variable -# is not found) -.get_feature_data <- function(x, by, msg = NULL){ - # Get variable without failing - tmp <- try({retrieveCellInfo(x, by, search = "colData")}, silent = TRUE) - # Check if fetching was failed - if(is(tmp,"try-error")){ - # If msg is not NULL, fail if variable is not found. - # Otherwise, give NULL. - if( !is.null(msg) ){ - stop(msg, call. = FALSE) - } else{ - tmp <- NULL + # If the data is paired, and some data is missing from the repeated time + # points, add the samples as missing. + # Generate all combinations of sample_type and time_point + if( paired && !is.null(order.col.by) && !is.null(col.var) ){ + # Calculate how many times each patient-time point pair is present. + # They must be only once (or none). If they are multiple times, the + # data is not correctly paired. + num_pairs <- df |> + group_by( + across(all_of(col.var)), .data[[order.col.by]], colour_by) |> + summarize(count = n(), .groups = "drop") |> + pull(count) + if (any(num_pairs > 1)) { + stop("Data appears to contain multiple samples for some ", + "combinations of 'col.var' and 'order.col.by'. Ensure that ", + "each combination corresponds to a unique sample.", + call. = FALSE) } + # Get all the time point / patient combinations for each feature + sample_pairs <- df |> + select(all_of(col.var), all_of(order.col.by), colour_by) |> + distinct() |> + complete(!!!syms(col.var), .data[[order.col.by]], colour_by) + # Join with the original data, filling missing values with NA + df <- sample_pairs |> + dplyr::left_join(df, by = c(order.col.by, col.var, "colour_by")) + # Now we have a dataset that includes all patients for each timepoint. + # Add arbitrary sample names for those samples that were added. + df <- df |> + group_by(colour_by) |> + mutate(X = ifelse(is.na(as.character(X)), + paste0("added_", row_number()), as.character(X))) |> + ungroup() |> + mutate(X = factor(X)) } - return(tmp) + return(df) } -.get_features_data <- function(features, order_sample_by, x){ - # Get variable (this functions fetches data for both odering and feature - # plotting) - features <- unique(c(order_sample_by,features)) - # Get values - features_data <- lapply( - features, .get_feature_data, x = x) - # Get those values that are found - non_empty <- !vapply(features_data, is.null, logical(1)) - features_data <- features_data[non_empty] - if(length(features_data) == 0){ - return(NULL) - } - # Get values and names and create data.frame from them - values <- lapply(features_data, "[[", "value") - names <- lapply(features_data, "[[", "name") - features_data <- data.frame(values) - colnames(features_data) <- names - # Add sample names to rows - if(!is.null(colnames(x))){ - rownames(features_data) <- colnames(x) - } else { - rownames(features_data) <- paste0("Sample",seq_len(ncol(x))) +# This function modifies factor of rows/features to follow the user-specified +# order. +#' @importFrom dplyr group_by summarise arrange desc distinct pull +.order_abundance_rows <- function( + df, order.row.by = order_rank_by, order_rank_by = "name", + row.levels = NULL, order.col.by = order_sample_by, + order_sample_by = NULL, ...){ + # + correct <- .is_a_string(order.row.by) && order.row.by %in% + c("name", "abund", "revabund") + if( !correct ){ + stop("'order.row.by' must be 'name', 'abund' or 'revabund'.", + call. = FALSE) + } + if( !(is.null(row.levels) || is.character(row.levels)) ){ + stop("'row.levels' must include all rows.", call. = FALSE) } - return(features_data) + # The ordering factor must be found from colData or be one of the rows + is_coldata <- .is_a_string(order.col.by) && order.col.by %in% colnames(df) + is_feat <- .is_a_string(order.col.by) && order.col.by %in% df$colour_by + if( !(is.null(order.col.by) || is_coldata || is_feat) ){ + stop("'order.col.by' must be a variable from colData(x) or a name ", + "of a row.", call. = FALSE) + } + # + # If user specified levels to use, we get those levels and combine them with + # rownames so that user do not have to specify all names + if( !is.null(row.levels) ){ + row.levels <- union(row.levels, as.character(df$colour_by)) + } + # Order columns and rows alphabetically by default + if( is.null(row.levels) && order.row.by == "name" ){ + row.levels <- sort(unique(unfactor(df$colour_by))) + } + # Get levels based on abundance + if( is.null(row.levels) && order.row.by %in% c("abund", "revabund") ){ + row.levels <- df |> + group_by(colour_by) |> + summarise(mean_abundance = mean(Y, na.rm = TRUE)) |> + # Either sort based on increasing or decreasing order + arrange(if (order.row.by == "abund") desc(mean_abundance) else + mean_abundance) |> + distinct(colour_by) |> + pull(colour_by) + } + # If user wants to order columns based on abundance of certain taxa, the + # taxa will be added on top of the figure + if( is_feat ){ + row.levels <- unique(c(order.col.by, as.character(row.levels))) + } + # Apply the ordering + df$colour_by <- factor(df$colour_by, levels = row.levels) + return(df) } -#' @importFrom dplyr pull -.order_abund_feature_data <- function( - abund_data, features_data, order_sample_by, decreasing = TRUE){ - # If ordering was specified - if(!is.null(order_sample_by)){ - # Get levels - lvl <- levels(abund_data$X) - # If user specified taxan to be used to order the data - if(is.null(features_data) || - !(order_sample_by %in% colnames(features_data))){ - # Presort by taxon value - lvl_tmp <- levels(abund_data$colour_by) - lvl_tmp <- c( - order_sample_by, lvl_tmp[!(lvl_tmp %in% order_sample_by)]) - abund_data$colour_by <- factor(abund_data$colour_by, lvl_tmp) - # Get the abundance values from certain taxon - data <- abund_data[abund_data$colour_by %in% order_sample_by, ] - data <- data[["Y"]] - } else { - # Otherwise get the order from the variable - data <- features_data[[order_sample_by]] - } - # If the ordering value is factor, order based on alphabetical order. - # Order numeric values in incerasing order. - if(is.factor(data)){ - o <- order(data, decreasing = !decreasing) - } else { - o <- order(data, decreasing = decreasing) - } - # Reset lvls and reorder the data based on - lvl <- lvl[o] - abund_data$X <- factor(abund_data$X, lvl) - abund_data <- abund_data[order(abund_data$colour_by, abund_data$X),] - # If the data includes also sample metadata to be plotted, reorder - # it also. - if(!is.null(features_data)){ - o <- order(factor(rownames(features_data), lvl)) - features_data <- features_data[o, , drop = FALSE] - } +# This function modifies factor of columns/samples to follow the user-specified +# order. +#' @importFrom dplyr filter arrange desc distinct pull +.order_abundance_cols <- function( + df, order.col.by = order_sample_by, order_sample_by = NULL, + col.levels = NULL, decreasing = TRUE, ...){ + # The ordering factor must be found from colData or be one of the rows + is_coldata <- .is_a_string(order.col.by) && order.col.by %in% colnames(df) + is_feat <- .is_a_string(order.col.by) && order.col.by %in% df$colour_by + if( !(is.null(order.col.by) || is_coldata || is_feat) ){ + stop("'order.col.by' must be a variable from colData(x) or a name ", + "of a row.", call. = FALSE) } - res <- list(abund_data = abund_data, features_data = features_data) - return(res) + if( !(is.null(col.levels) || is.character(col.levels)) ){ + stop("col.levels' must include all columns.", call. = FALSE) + } + if( !.is_a_bool(decreasing) ){ + stop("'decreasing' must be TRUE or FALSE.", call. = FALSE) + } + # + # If user specified levels to use, we get those levels and combine them with + # rownames so that user do not have to specify all names + if( !is.null(col.levels) ){ + col.levels <- union(col.levels, as.character(df$X)) + } + # If column from colData was specified, give the order of samples based on + # this variable + if( is.null(col.levels) && is_coldata ){ + col.levels <- df |> + arrange(if (decreasing) .data[[order.col.by]] else + desc(.data[[order.col.by]]) ) |> + distinct(X) |> + pull(X) + } + # Filter for the specified feature, arrange the dataframe based on the + # specified column and direction, and then pull unique X values + if( is.null(col.levels) && is_feat ){ + col.levels <- df |> + filter(colour_by == order.col.by) |> + arrange(if (decreasing) desc(Y) else Y) |> + distinct(X) |> + pull(X) + } + # If column ordering was not specified, order alphabetically + if( is.null(col.levels) ){ + col.levels <- sort(unique(as.character(df$X))) + } + # Apply the ordering + df$X <- factor(df$X, levels = col.levels) + return(df) } - ################################################################################ -# abundance plotter - +# Abundance plotters +# THis function creates the main abundance plot #' @importFrom ggplot2 ggplot theme_classic geom_point geom_bar coord_flip #' scale_y_continuous -.abund_plotter <- function(object, +.abund_plotter <- function( + object, xlab = "Samples", ylab = paste0(ifelse(as.relative, "Rel. ", ""),"Abundance"), colour_by = NULL, @@ -493,110 +494,141 @@ setMethod("plotAbundance", signature = c("SummarizedExperiment"), use_relative = FALSE, ... ){ - # start plotting - plot_out <- ggplot(object, aes(x=.data[["X"]], y=.data[["Y"]])) + + # Start plotting. From barplot, we exclude 0 values. As we use borders by + # default, 0 values get also borders which looks like they have also + # abundance. + plot_out <- ggplot(object, aes( + x = X, + y = ifelse(Y == 0 & layout == "bar", NA, Y) + )) + xlab(xlab) + ylab(ylab) - # either bar or point plot - if(layout == "bar"){ - abund_out <- .get_bar_args(colour_by, - alpha = bar_alpha, - add_border = add_border, - n = length(unique(object$X))) + # Either bar or point plot + if( layout == "bar" ){ + # Get arguments for bar plot + abund_out <- .get_bar_args( + colour_by, alpha = bar_alpha, add_border = add_border, + n = length(unique(object$X))) + # Create a bar plot plot_out <- plot_out + - do.call(geom_bar, c(abund_out$args, list(stat="identity"))) + + do.call(geom_bar, c(abund_out$args, list(stat = "identity"))) + scale_y_continuous(expand = c(0,0)) - } else { - abund_out <- .get_point_args(colour_by, - shape_by = NULL, - size_by = NULL, - alpha = point_alpha, - size = point_size) + } else if( layout == "point" ){ + # Get arguments for point plot + abund_out <- .get_point_args( + colour_by, shape_by = NULL, size_by = NULL, alpha = point_alpha, + size = point_size) abund_out$border <- TRUE + # Create a point plot plot_out <- plot_out + do.call(geom_point, abund_out$args) } - # adjust point colours - if(!is.null(colour_by)){ - if(abund_out$border){ - # resolve the colour for the line colours - plot_out <- .resolve_plot_colours(plot_out, - object$colour_by, - colour_by, - fill = FALSE) + # Adjust point colours + if( !is.null(colour_by) ){ + if( abund_out$border ){ + # Resolve the colour for the line colours + plot_out <- .resolve_plot_colours( + plot_out, object$colour_by, colour_by, fill = FALSE) } - # resolve the color for fill - plot_out <- .resolve_plot_colours(plot_out, - object$colour_by, - colour_by, - fill = TRUE) + # Resolve the color for fill + plot_out <- .resolve_plot_colours( + plot_out, object$colour_by, colour_by, fill = TRUE) } # Adjust theme plot_out <- plot_out + theme_classic() - # Remove legend if speicified + # Remove legend if specified plot_out <- .add_legend(plot_out, add_legend) # Flip the plot if specified plot_out <- .flip_plot(plot_out, flipped, add_x_text) return(plot_out) } -#' @importFrom ggplot2 ggplot aes labs geom_point geom_raster -.feature_plotter <- function( - feature_data, - name, - xlab = "Samples", - flipped, - add_legend, - add_x_text, - point_alpha, - point_size){ - # If the values are factors, use coloring to plot them. This step is to - # ensure that this functions works both with factors and numeric values. - if(is.factor(feature_data$Y)){ - feature_data$colour_by <- feature_data$Y - feature_data$Y <- "" - colour_by <- unique(feature_data$feature_name) +# This function takes care of incorporating sample metadata to plot. It is added +# either as facets or unique plot. Moreover, the function has also functinality +# to split rows to unique facets. +#' @importFrom dplyr select all_of distinct arrange select +#' @importFrom stats formula +.abund_plotter_incorporate_metadata <- function( + plot_out, df, col.var = features, features = NULL, + facet.cols = FALSE, facet.rows = one.facet, + one.facet = one_facet, one_facet = FALSE, ncol = 2, scales = "fixed", + ...){ + # + if( !(is.null(col.var) || (is.character(col.var) && + all(col.var %in% colnames(df)))) ){ + stop("'col.var' must specify columns from colData(x).", call. = FALSE) } - # Start plotting - feature_plot_out <- ggplot( - feature_data, aes(x=.data[["X"]], y=.data[["Y"]])) + - labs(x = xlab, y = name) - # If there is only one value, i.e., the variable to be plotted was factor - if(length(unique(feature_data$Y)) == 1L){ - # Create a bar layout - feature_out <- .get_bar_args( - colour_by, alpha = point_alpha, add_border = FALSE) - feature_plot_out <- feature_plot_out + - do.call(geom_raster, feature_out$args) + - scale_y_discrete(expand = c(0,0)) - # Adjust the colour scale and legend title - feature_plot_out <- .resolve_plot_colours( - feature_plot_out, feature_data$colour_by, colour_by, fill = TRUE) - legend_pos <- "bottom" - } else { - # If the values are numeric, create a point layout - feature_out <- .get_point_args( - NULL, shape_by = NULL, size_by = NULL, alpha = point_alpha, - size = point_size) - feature_plot_out <- feature_plot_out + - do.call(geom_point, feature_out$args) - legend_pos <- "right" + if(!.is_a_bool(facet.cols)){ + stop("'facet.cols' must be TRUE or FALSE.", call. = FALSE) } - # Adjust theme - feature_plot_out <- feature_plot_out + - theme_classic() - # Remove legend if specified, adjust the position - feature_plot_out <- .add_legend(feature_plot_out, add_legend, legend_pos) - # Flip the plot if specified - feature_plot_out <- .flip_plot(feature_plot_out, flipped, add_x_text) - return(feature_plot_out) + if(!.is_a_bool(facet.rows)){ + stop("'facet.rows' must be TRUE or FALSE.", call. = FALSE) + } + if( sum(c(facet.rows, facet.cols)) == 2L ){ + stop("'Both 'facet.rows' and 'facet.cols' cannot be TRUE.", + call. = FALSE) + } + if( !(.is_an_integer(ncol) && ncol >= 1) ){ + stop("'ncol' must be an integer value greater or equal to 1.", + call. = FALSE) + } + if( !(.is_a_string(scales) && scales %in% + c("fixed", "free", "free_x", "free_y")) ){ + stop("'scales' must be 'fixed', 'free', 'free_x' or 'free_y.", + call. = FALSE) + } + # + # facet.rows is disabled if sample metadata is plotted + facet.rows <- if(!is.null(col.var)) FALSE else facet.rows + # Whether to split the main plot to multiple facets by rows, i.e., each + # taxa has unique facet. + if( facet.rows ){ + plot_out <- plot_out + + facet_wrap(~colour_by, ncol = ncol, scales = scales) + } + # Whether to facet the plot so that columns are facetted based on sample + # metadata. This is for single metadata variable. + if( length(col.var) == 1L && facet.cols ){ + plot_out <- plot_out + facet_wrap( + formula(paste0("~ `", paste0(col.var, collapse = "` + `"),"`")), + ncol = ncol, + scales = scales) + } + # This is also for facetting based on sample metadata, however, this allows + # user to facet columns based on multiple sample metadata variables, e.g. + # time point and sample type. + if( length(col.var) > 1L && facet.cols ){ + .require_package("ggh4x") + plot_out <- plot_out + ggh4x::facet_nested( + formula(paste0("~ `", paste0(col.var, collapse = "` + `"), "`")), + scales = scales) + } + # If user do not want to create facets from the sample metadata, create + # unique plots from metadata variables. + if( !is.null(col.var) && !facet.cols ){ + # Select only sample metadata. Get it in same order that the samples + # are. After this we have only col.var columns, and metadata includes + # as many rows as there are samples. + metadata <- df |> + select(X, all_of(col.var)) |> + distinct() |> + arrange(X) |> + select(-X) + # Create a plot from metadata variables + plot_feature_out <- .features_plotter(metadata, col.var, ...) + # Add the metadata plots to a list with main plot + plot_out <- c(list(abundance = plot_out), plot_feature_out) + names(plot_out) <- c("abundance", col.var) + } + return(plot_out) } +# This function takes care that each sample metadata variable is plotted as +# unique plot. .features_plotter <- function( features_data, - order_sample_by, xlab = NULL, flipped = FALSE, add_legend = add.legend, @@ -612,12 +644,11 @@ setMethod("plotAbundance", signature = c("SummarizedExperiment"), names <- colnames(features_data) # For each variable, create a data.frame that contains sample names, # variable name and values of variable - features_data <- lapply( - names, function(col){ - data.frame( - X = factor(rownames(features_data), rownames(features_data)), - feature_name = col, - Y = features_data[[col]]) + features_data <- lapply(names, function(col){ + data.frame( + X = factor(rownames(features_data), rownames(features_data)), + feature_name = col, + Y = features_data[[col]]) }) names(features_data) <- names # Loop through variables and create plot for each variable @@ -634,16 +665,58 @@ setMethod("plotAbundance", signature = c("SummarizedExperiment"), point_size = point_size), SIMPLIFY = FALSE) names(plots_out) <- names(features_data) - # If the varoable for order the data was specified, return only the feature - # plot with that variable along with the main plot. This means that all - # other feature plots are discarded. - if(!is.null(order_sample_by)){ - reorder <- c( - order_sample_by, - names(plots_out)[!(names(plots_out) %in% order_sample_by)]) - m <- match(reorder,names(plots_out)) - m <- m[!is.na(m)] - plots_out <- plots_out[m] - } return(plots_out) } + +# This function creates a plot from single sample metadata variable. +#' @importFrom ggplot2 ggplot aes labs geom_point geom_raster +.feature_plotter <- function( + feature_data, + name, + xlab = "Samples", + flipped, + add_legend, + add_x_text, + point_alpha, + point_size){ + # If the values are factors, use coloring to plot them. This step is to + # ensure that this functions works both with factors and numeric values. + if( is.factor(feature_data$Y) ){ + feature_data$colour_by <- feature_data$Y + feature_data$Y <- "" + colour_by <- unique(feature_data$feature_name) + } + # Start plotting + plot_out <- ggplot( + feature_data, aes(x = X, y = Y)) + + labs(x = xlab, y = name) + # If there is only one value, i.e., the variable to be plotted was factor + if( length(unique(feature_data$Y)) == 1L ){ + # Create a bar layout + feature_out <- .get_bar_args( + colour_by, alpha = point_alpha, add_border = FALSE) + plot_out <- plot_out + + do.call(geom_raster, feature_out$args) + + scale_y_discrete(expand = c(0,0)) + # Adjust the colour scale and legend title + plot_out <- .resolve_plot_colours( + plot_out, feature_data$colour_by, colour_by, fill = TRUE) + legend_pos <- "bottom" + } else { + # If the values are numeric, create a point layout + feature_out <- .get_point_args( + NULL, shape_by = NULL, size_by = NULL, alpha = point_alpha, + size = point_size) + plot_out <- plot_out + + do.call(geom_point, feature_out$args) + legend_pos <- "right" + } + # Adjust theme + plot_out <- plot_out + + theme_classic() + # Remove legend if specified, adjust the position + plot_out <- .add_legend(plot_out, add_legend, legend_pos) + # Flip the plot if specified + plot_out <- .flip_plot(plot_out, flipped, add_x_text) + return(plot_out) +} diff --git a/man/plotAbundance.Rd b/man/plotAbundance.Rd index 1ad5738..6d4d15b 100644 --- a/man/plotAbundance.Rd +++ b/man/plotAbundance.Rd @@ -9,20 +9,9 @@ plotAbundance(x, ...) \S4method{plotAbundance}{SummarizedExperiment}( x, - col.var = features, - features = NULL, - order.row.by = order_rank_by, - order_rank_by = c("name", "abund", "revabund"), - order.col.by = order_sample_by, - order_sample_by = NULL, - decreasing = TRUE, - layout = c("bar", "point"), - one.facet = one_facet, - one_facet = TRUE, - ncol = 2, - scales = "fixed", assay.type = assay_name, assay_name = "counts", + layout = "bar", ... ) } @@ -33,65 +22,65 @@ object.} \item{...}{additional parameters for plotting. \itemize{ -\item \code{group} \code{Character scalar}. Specifies the group for +\item \code{group}: \code{Character scalar}. Specifies the group for agglomeration. Must be a value from \code{colnames(rowData(x))}. If \code{NULL}, agglomeration is not applied. (Default: \code{NULL}) -\item \code{as.relative} \code{Character scalar}. Should the relative +\item \code{as.relative}: \code{Character scalar}. Should the relative values be calculated? (Default: \code{FALSE}) -} -See \code{\link{mia-plot-args}} for more details i.e. call -\code{help("mia-plot-args")}} -\item{col.var}{\code{Character scalar}. Selects a column from +\item \code{col.var}: \code{Character scalar}. Selects a column from \code{colData} to be plotted below the abundance plot. Continuous numeric values will be plotted as point, whereas factors and -character will be plotted as colour-code bar. (Default: \code{NULL})} - -\item{features}{Deprecated. Use \code{col.var} instead.} +character will be plotted as colour-code bar. (Default: \code{NULL}) -\item{order.row.by}{\code{Character scalar}. How to order abundance value: -By name (\code{"name"}) -for sorting the taxonomic labels alphabetically, by abundance -(\code{"abund"}) to sort by abundance values or by a reverse order of -abundance values (\code{"revabund"}). (Default: \code{"name"})} +\item \code{order.row.by}: \code{Character scalar}. How to order abundance +value. By name (\code{"name"}) for sorting the taxonomic labels +alphabetically, by abundance (\code{"abund"}) to sort by abundance +values or by a reverse order of +abundance values (\code{"revabund"}). (Default: \code{"name"}) -\item{order_rank_by}{Deprecated. Use \code{order.row.by} instead.} +\item \code{row.levels}: \code{Character vector}. Specifies order of rows +in a plot. Can be combined with \code{order.row.by} to control order +of only certain rows. If \code{NULL}, the order follows +\code{order.row.by}. (Default: \code{NULL}) -\item{order.col.by}{\code{Character scalar}. from the chosen rank of -abundance data or from \code{colData} to select values to order the abundance -plot by. (Default: \code{NULL})} +\item \code{order.col.by}: \code{Character scalar}. from the chosen rank of +abundance data or from \code{colData} to select values to order the +abundance plot by. (Default: \code{NULL}) -\item{order_sample_by}{Deprecated. Use \code{order.col.by} instead.} +\item \code{col.levels}: \code{Character vector}. Specifies order of +columns in a plot. Can be combined with \code{order.col.by} to control +order of only certain columns. If \code{NULL}, the order follows +\code{order.col.by}. (Default: \code{NULL}) -\item{decreasing}{\code{Logical scalar}. If the \code{order.col.by} -is defined and the -values are numeric, should the values used to order in decreasing or -increasing fashion? (Default: \code{FALSE})} +\item \code{decreasing}: \code{Logical scalar}. If the \code{order.col.by} +is defined and the values are numeric, should the values used to order in +decreasing or increasing fashion? (Default: \code{FALSE}) -\item{layout}{\code{Character scalar}. Either \dQuote{bar} or \dQuote{point}.} - -\item{one.facet}{\code{Logical scalar}. Should the plot be returned in on -facet or split into -different facet, one facet per different value detect in \code{group}. If -\code{col.var} or \code{order.col.by} is not \code{NULL}, this setting will -be disregarded. (Default: \code{TRUE})} +\item \code{facet.rows}: \code{Logical scalar}. Should the rows in the +plot be spitted into facets? (Default: \code{FALSE}) -\item{one_facet}{Deprecated. Use \code{one.facet} instead.} +\item \code{facet.cols}: \code{Logical scalar}. Should the columns in the +plot be spitted into facets? (Default: \code{FALSE}) -\item{ncol}{\code{Numeric scalar}. if \code{one.facet = FALSE}, -\code{ncol} defines many -columns should be for plotting the different facets. (Default: \code{2})} +\item \code{ncol}: \code{Numeric scalar}. if facets are applied, +\code{ncol} defines many columns should be for plotting the different +facets. (Default: \code{2}) -\item{scales}{\code{Character scalar}. Defines the behavior of the scales -of each facet. Both values are -passed onto \code{\link[ggplot2:facet_wrap]{facet_wrap}}. -(Default: \code{"fixed"})} +\item \code{scales} \code{Character scalar}. Defines the behavior of the +scales of each facet. The value is passed into +\code{\link[ggplot2:facet_wrap]{facet_wrap}}. (Default: \code{"fixed"}) +} +See \code{\link{mia-plot-args}} for more details i.e. call +\code{help("mia-plot-args")}} \item{assay.type}{\code{Character scalar} value defining which assay data to use. (Default: \code{"relabundance"})} \item{assay_name}{Deprecate. Use \code{assay.type} instead.} + +\item{layout}{\code{Character scalar}. Either \dQuote{bar} or \dQuote{point}.} } \value{ a \code{\link[ggplot2:ggplot]{ggplot}} object or list of two @@ -137,7 +126,7 @@ plotAbundance( # Apply relative transform tse <- transformAssay(tse, method = "relabundance") - + # A feature from colData or taxon from chosen rank can be used for ordering # samples. plotAbundance( @@ -146,7 +135,7 @@ plotAbundance( # col.var from colData can be plotted together with abundance plot. # Returned object is a list that includes two plot; other visualizes -## abundance other col.var. +## abundance other col.var. plot <- plotAbundance( tse, assay.type = "relabundance", group = "Phylum", col.var = "SampleType") @@ -154,19 +143,17 @@ plot <- plotAbundance( # These two plots can be combined with wrap_plots function from patchwork # package library(patchwork) -wrap_plots(plot, ncol = 1) +wrap_plots(plot, ncol = 1, heights = c(0.95, 0.05)) } # Same plot as above but showing sample IDs as labels for the x axis on the -# top plot -plot[[1]] <- plotAbundance( +# top plot. Moreover, we use facets. +plot <- plotAbundance( tse, assay.type = "relabundance", group = "Phylum", col.var = "SampleType", add.legend = FALSE, - add.x.text = TRUE)[[1]] + - theme(axis.text.x = element_text(angle = 90)) -\donttest{ -wrap_plots(plot, ncol = 1, heights = c(0.8,0.2)) -} + add.x.text = TRUE, facet.cols = TRUE, scales = "free_x") + + theme(axis.text.x = element_text(angle = 90)) +plot # Compositional barplot with top 5 taxa and samples sorted by # "Bacteroidetes" diff --git a/tests/testthat/test-2plotAbundance.R b/tests/testthat/test-2plotAbundance.R index 3c77407..c84a194 100644 --- a/tests/testthat/test-2plotAbundance.R +++ b/tests/testthat/test-2plotAbundance.R @@ -5,80 +5,104 @@ test_that("plot abundance", { data(GlobalPatterns) x <- GlobalPatterns # .check_tree_plot_switches - expect_error(miaViz:::.get_abundance_data(group = "Phylum"), - 'argument "x" is missing') - expect_error(miaViz:::.get_abundance_data(x, group = "Phylum"), - 'argument "assay.type" is missing') + expect_error( + miaViz:::.get_abundance_data(group = "Phylum"), + 'argument "x" is missing') + expect_error( + miaViz:::.get_abundance_data(x, group = "Phylum"), + 'argument "assay.type" is missing') expect_error(miaViz:::.get_abundance_data(x)) - actual <- miaViz:::.get_abundance_data(x, group = "Phylum", assay.type = "counts") + actual <- miaViz:::.get_abundance_data( + x, group = "Phylum", assay.type = "counts") expect_s3_class(actual,"tbl_df") - expect_named(actual,c("colour_by","X","Y")) + expect_true( all(c("colour_by","X","Y") %in% colnames(actual)) ) expect_true(is.factor(actual$colour_by)) expect_true(is.factor(actual$X)) expect_true(is.numeric(actual$Y)) - expect_error(miaViz:::.get_abundance_data( - x, group = "Phylum", assay.type = "counts", order_rank_by = "meep")) - actual2 <- miaViz:::.get_abundance_data( - x, group = "Phylum", assay.type = "counts", order_rank_by = "abund") + expect_error(miaViz:::.order_abundance_rows(actual, order.row.by = "meep")) + actual2 <- miaViz:::.order_abundance_rows(actual, order.row.by = "abund") expect_equal(as.character(actual[1,1,drop=TRUE]),"ABY1_OD1") - expect_equal(as.character(actual2[1,1,drop=TRUE]),"Proteobacteria") + expect_equal(levels(actual2[["colour_by"]])[[1]],"Proteobacteria") actual3 <- miaViz:::.get_abundance_data( - x, group = "Phylum", assay.type = "counts", order.row.by = "abund", + x, group = "Phylum", assay.type = "counts", + order.row.by = "abund", as.relative = FALSE) - expect_true(max(actual3$Y) > 1) - # .norm_order_sample_by - expect_true(is.null(miaViz:::.norm_order_sample_by(NULL))) - expect_error(miaViz:::.norm_order_sample_by("meep"), - 'argument "factors" is missing') - expect_error(miaViz:::.norm_order_sample_by("meep","meep2",x), - "'order.col.by' must be a single non-empty character value") - expect_equal(miaViz:::.norm_order_sample_by("Primer","meep2",x),"Primer") - # .get_feature_data - expect_true(is.null(miaViz:::.get_feature_data())) - expect_true(is.null(miaViz:::.get_feature_data(x))) - expect_true(is.null(miaViz:::.get_feature_data(x,"meep"))) - actual <- miaViz:::.get_feature_data(x,"Primer") - expect_true(is.list(actual)) - expect_named(actual,c("name","value")) - # .get_features_data - expect_error(miaViz:::.get_features_data(), - 'argument "order_sample_by" is missing') - expect_error(miaViz:::.get_features_data("Primer"), - 'argument "order_sample_by" is missing') - actual <- miaViz:::.get_features_data("Primer","SampleType",x) - expect_true(is.data.frame(actual)) - expect_named(actual,c("SampleType","Primer")) - # .order_abund_feature_data - abund_data <- miaViz:::.get_abundance_data( - x, group = "Phylum", assay.type = "counts") - features_data <- miaViz:::.get_features_data("Primer","SampleType",x) - expect_error(miaViz:::.order_abund_feature_data(abund_data,features_data), - 'argument "order_sample_by" is missing') - actual <- miaViz:::.order_abund_feature_data(abund_data,features_data, - "Primer") - expect_true(is.list(actual)) - expect_named(actual,c("abund_data","features_data")) - expect_equal(abund_data,actual$abund_data) - expect_equal(features_data,actual$features_data) - actual <- miaViz:::.order_abund_feature_data(abund_data,features_data, - "Primer",decreasing = FALSE) - expect_error(expect_equal(abund_data,actual$abund_data)) - expect_error(expect_equal(features_data,actual$features_data)) + expect_true(max(actual3$Y, na.rm = TRUE) > 1) + # .order_abundance_cols + expect_error(miaViz:::.order_abundance_cols("meep")) + expect_error(miaViz:::.order_abundance_cols("meep","meep2",x)) # expect_error(plot <- plotAbundance(x, assay.type="counts")) plot <- plotAbundance(x[1:20, ], assay.type="counts") expect_s3_class(plot,"ggplot") - expect_named(plot$data,c("colour_by","X","Y")) - plot <- plotAbundance(x, assay.type="counts", group = "Phylum", - col.var = "SampleType", - order.col.by = "SampleType") + expect_true(all(c("colour_by","X","Y") %in% colnames(plot$data))) + plot <- plotAbundance( + x, assay.type = "counts", group = "Phylum", col.var = "SampleType", + order.col.by = "SampleType") expect_true(is.list(plot)) expect_s3_class(plot[[1]],"ggplot") + # Check that the grouping is correct + expect_true(all(levels(plot$data$colour_by) %in% unique(rowData(x)$Phylum))) # rowData(x)$Salame <- sample(letters[1:5], nrow(x), replace=TRUE) - plot <- plotAbundance(x, assay.type="counts", group = "Salame", - col.var = "SampleType", - order.col.by = "SampleType") + plot <- plotAbundance( + x, assay.type="counts", group = "Salame", col.var = "SampleType", + order.col.by = "SampleType") expect_true(is.list(plot)) expect_s3_class(plot[[1]],"ggplot") + # Expect error since too many rows + expect_error(plotAbundance(x, assay.type="counts")) + # Mock data for paired sample testing + df <- data.frame( + sample = c("S1", "S2", "S3"), + time_point = c(1, 1, 2), + value = c(10, 15, 20) + ) + # Test paired sample logic with correct input + paired_df <- miaViz:::.add_paired_samples(df, paired = TRUE, order.col.by = "sample") + expect_equal(paired_df, df) # Should still have 3 rows (no missing) + # Test with invalid `paired` input + expect_error(miaViz:::.add_paired_samples(df, paired = "INVALID")) + # Test edge case with empty data + empty_df <- data.frame() + expect_equal(nrow(miaViz:::.add_paired_samples(empty_df)), 0) + # + # Test with correct input + actual <- miaViz:::.get_abundance_data( + x, group = "Phylum", assay.type = "counts") + expect_s3_class(actual, "tbl_df") + expect_true(all(c("colour_by", "X", "Y") %in% colnames(actual))) + # Test error when missing necessary arguments + expect_error(miaViz:::.get_abundance_data(x[1:10, ], group = "Phylum"), + "argument \"assay.type\" is missing, with no default") + expect_error(miaViz:::.get_abundance_data(x[1:10, ]), + "argument \"assay.type\" is missing, with no default") + # Edge case with empty dataset + empty_x <- x[NULL, ] # Empty dataset + expect_error(miaViz:::.get_abundance_data( + empty_x, group = "Phylum", assay.type = "counts")) + # Test with correct input + df <- .get_abundance_data(x[1:20, ], "counts") + actual <- miaViz:::.order_abundance_cols( + df, order.col.by = "SampleType") + expect_s3_class(actual, "tbl_df") + # Test error for incorrect ordering variable + expect_error(miaViz:::.order_abundance_cols( + df, order.col.by = "InvalidColumn")) + # Test edge case with empty dataset + empty_df <- df[NULL, ] # Empty dataset + expect_equal(nrow(miaViz:::.order_abundance_cols( + empty_df, order.col.by = "SampleType")), 0) + # Edge case: Empty dataset + x_empty <- x[NULL, ] + expect_error(plotAbundance(x_empty, assay.type = "counts")) + # Edge case: Dataset missing the assay.type column + x_missing <- x + assays(x_missing)$counts <- NULL # Remove assay.type column + expect_error(plotAbundance(x_missing[1:10, ], assay.type = "counts")) + # Test with numeric data as grouping factor + x_numeric_group <- x[1:20, ] + rowData(x_numeric_group)$Group <- 1:20 # Numeric group instead of factor + expect_error(plot <- plotAbundance( + x_numeric_group, assay.type="counts", group = "Group")) })