diff --git a/.Rproj.user/E4C83D23/console06/INDEX001 b/.Rproj.user/E4C83D23/console06/INDEX001 index daaab50..dd37ffc 100644 --- a/.Rproj.user/E4C83D23/console06/INDEX001 +++ b/.Rproj.user/E4C83D23/console06/INDEX001 @@ -1 +1 @@ -[{"allow_restart":true,"alt_buffer":false,"autoclose":1,"buffered_output":"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","caption":"Terminal 1","channel_id":"4185","channel_mode":1,"child_procs":false,"cols":87,"cwd":"~/Repositary/visual16S","exit_code":1,"handle":"57531550","interaction_mode":2,"max_output_lines":1000,"restarted":true,"rows":60,"shell_type":7,"show_on_output":false,"terminal_sequence":1,"title":"/home/yeguanhua/Repositary/visual16S","track_env":true,"zombie":false}] \ No newline at end of file +[{"allow_restart":true,"alt_buffer":false,"autoclose":1,"buffered_output":"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","caption":"Terminal 1","channel_id":"4977","channel_mode":1,"child_procs":false,"cols":87,"cwd":"~/Repositary/visual16S","exit_code":null,"handle":"57531550","interaction_mode":2,"max_output_lines":1000,"restarted":true,"rows":60,"shell_type":7,"show_on_output":false,"terminal_sequence":1,"title":"yeguanhua@XBIOME-SERVER-CPU: ~/Repositary/visual16S","track_env":true,"zombie":false}] \ No newline at end of file diff --git a/.Rproj.user/E4C83D23/pcs/files-pane.pper b/.Rproj.user/E4C83D23/pcs/files-pane.pper index ff8e7d9..55ef08c 100644 --- a/.Rproj.user/E4C83D23/pcs/files-pane.pper +++ b/.Rproj.user/E4C83D23/pcs/files-pane.pper @@ -1,5 +1,5 @@ { - "path" : "~/Repositary/visual16S", + "path" : "~/xviz", "sortOrder" : [ { "ascending" : true, diff --git a/.Rproj.user/E4C83D23/pcs/source-pane.pper b/.Rproj.user/E4C83D23/pcs/source-pane.pper index 9efb51b..70829f6 100644 --- a/.Rproj.user/E4C83D23/pcs/source-pane.pper +++ b/.Rproj.user/E4C83D23/pcs/source-pane.pper @@ -1,3 +1,3 @@ { - "activeTab" : -1 + "activeTab" : 1 } \ No newline at end of file diff --git a/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper b/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper index e273a22..64b861b 100644 --- a/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper +++ b/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper @@ -1,13 +1,13 @@ { "left" : { "panelheight" : 1325, - "splitterpos" : 546, - "topwindowstate" : "HIDE", + "splitterpos" : 543, + "topwindowstate" : "NORMAL", "windowheight" : 1379 }, "right" : { "panelheight" : 1325, - "splitterpos" : 543, + "splitterpos" : 542, "topwindowstate" : "NORMAL", "windowheight" : 1379 } diff --git a/.Rproj.user/E4C83D23/pcs/workbench-pane.pper b/.Rproj.user/E4C83D23/pcs/workbench-pane.pper index b15bf23..0e24b84 100644 --- a/.Rproj.user/E4C83D23/pcs/workbench-pane.pper +++ b/.Rproj.user/E4C83D23/pcs/workbench-pane.pper @@ -1,5 +1,5 @@ { - "TabSet1" : 3, + "TabSet1" : 0, "TabSet2" : 0, "TabZoom" : { } diff --git a/.Rproj.user/E4C83D23/persistent-state b/.Rproj.user/E4C83D23/persistent-state index d2e6b78..16ee712 100644 --- a/.Rproj.user/E4C83D23/persistent-state +++ b/.Rproj.user/E4C83D23/persistent-state @@ -1,8 +1,8 @@ build-last-errors="[]" -build-last-errors-base-dir="~/Repositary/visual16S/" -build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source visual16S\\n\\n\",\"type\":0},{\"output\":\"No protocol specified\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing to library ‘/home/yeguanhua/R/x86_64-pc-linux-gnu-library/3.5’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘visual16S’ ...\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"No protocol specified\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* DONE (visual16S)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]" +build-last-errors-base-dir="" +build-last-outputs="[]" compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}" -files.monitored-path="" +files.monitored-path="~/BJCancer/analysis" find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}" imageDirtyState="1" saveActionState="0" diff --git a/.Rproj.user/E4C83D23/sources/prop/INDEX b/.Rproj.user/E4C83D23/sources/prop/INDEX index 1cf0e4b..3373793 100644 --- a/.Rproj.user/E4C83D23/sources/prop/INDEX +++ b/.Rproj.user/E4C83D23/sources/prop/INDEX @@ -1,14 +1,28 @@ +~%2FBJCancer%2FMJ%2FMJ20190403%2Fanalysis%2FMJ_190403_analysis.Rmd="2E7B383F" ~%2FBJCancer%2FMJ%2Fanalysis%2Fanalysis_mj2.Rmd="3715EBC3" ~%2FBJCancer%2FMJ20190403011%2Fanalysis%2FMJ_190403_analysis.Rmd="5616A48D" +~%2FBJCancer%2Fanalysis%2Fanalysis.R="312F76A" +~%2FBJCancer%2Fanalysis%2Fsubject_extract.R="46FD7822" +~%2FBJCancer%2Fmetadata%2Fprocessing.R="298A5E2" ~%2FR%2F16S-analysis-visualization%2FDESCRIPTION="8FFE7823" ~%2FR%2F16S-analysis-visualization%2FREADME.Rmd="5DA43DB2" ~%2FR%2FXbiome16SVisualization%2FDESCRIPTION="EAFED69B" +~%2FRepositary%2F16S_analysis_functions_Yip.R="C94066A8" ~%2FRepositary%2Fvisual16S%2FDESCRIPTION="718DAA5C" ~%2FRepositary%2Fvisual16S%2FR%2Falpha_diversity_plot.R="AD67D71C" +~%2FRepositary%2Fvisual16S%2FR%2Fconstruct_lefse_table.R="E2E70350" +~%2FRepositary%2Fvisual16S%2FR%2Fconstruct_otu_table.R="9F3D6689" +~%2FRepositary%2Fvisual16S%2FR%2Flog2fc.R="80BEE29D" +~%2FRepositary%2Fvisual16S%2FR%2Fplot_alpha_diversity.R="C34039C6" +~%2FRepositary%2Fvisual16S%2FR%2Fplot_beta_diversity.R="2E6CF992" +~%2FRepositary%2Fvisual16S%2FR%2Fplot_correlation.R="C6A8435F" +~%2FRepositary%2Fvisual16S%2FR%2Fplot_sparsity.R="B8AEF7F3" ~%2FRepositary%2Fvisual16S%2FR%2Fplot_stacked_bar.R="9D821D65" +~%2FRepositary%2Fvisual16S%2FR%2Ftrack_reads_dada2.R="D1F8C44E" ~%2FRepositary%2Fvisual16S%2FREADME.Rmd="B4C7C8F" ~%2FRepositary%2Fvisual16S%2FREADME.md="CDBA173" ~%2FRepositary%2Fvisualization416S%2FREADME.Rmd="A92803F3" +~%2Fxviz%2FR%2Fconstruct_otu_table.R="844B86E3" ~%2Fxviz%2FR%2Fplot_alpha_diversity.R="27BC8921" ~%2Fxviz%2FR%2Fplot_beta_diversity.R="E3BEF9E6" ~%2Fxviz%2FR%2Fplot_stacked_bar.R="BDEA371A" diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 9c1a82f..cb77031 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -1,7 +1,21 @@ +/home/yeguanhua/BJCancer/MJ/MJ20190403/analysis/MJ_190403_analysis.Rmd="960A7DF" +/home/yeguanhua/BJCancer/analysis/analysis.R="C933209" +/home/yeguanhua/BJCancer/analysis/subject_extract.R="C8C14983" +/home/yeguanhua/BJCancer/metadata/processing.R="5E210D39" +/home/yeguanhua/Repositary/16S_analysis_functions_Yip.R="193FF21C" /home/yeguanhua/Repositary/visual16S/DESCRIPTION="D6FAFC4C" +/home/yeguanhua/Repositary/visual16S/R/construct_lefse_table.R="FE82BBBF" +/home/yeguanhua/Repositary/visual16S/R/construct_otu_table.R="1CB7F17F" +/home/yeguanhua/Repositary/visual16S/R/log2fc.R="25C0CF72" +/home/yeguanhua/Repositary/visual16S/R/plot_alpha_diversity.R="80616A8C" +/home/yeguanhua/Repositary/visual16S/R/plot_beta_diversity.R="CEFFBA5" +/home/yeguanhua/Repositary/visual16S/R/plot_correlation.R="8C1009F" +/home/yeguanhua/Repositary/visual16S/R/plot_sparsity.R="4BF1C0A9" /home/yeguanhua/Repositary/visual16S/R/plot_stacked_bar.R="C1BF12DA" +/home/yeguanhua/Repositary/visual16S/R/track_reads_dada2.R="B6F5D662" /home/yeguanhua/Repositary/visual16S/README.Rmd="EFE45572" /home/yeguanhua/Repositary/visual16S/README.md="802CD8B6" +/home/yeguanhua/xviz/R/construct_otu_table.R="7A12CFBD" /home/yeguanhua/xviz/R/plot_alpha_diversity.R="33D2C56B" /home/yeguanhua/xviz/R/plot_beta_diversity.R="41254790" /home/yeguanhua/xviz/R/plot_stacked_bar.R="6591DBD3" diff --git a/R/construct_lefse_table.R b/R/construct_lefse_table.R index 9558e94..c175ef1 100644 --- a/R/construct_lefse_table.R +++ b/R/construct_lefse_table.R @@ -5,15 +5,14 @@ #' Format Data for LEfSe for more details. #' #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample -#' metadata and phylogenetic tree. +#' metadata and phylogenetic tree. #' @param feature The column name of the feature you want to select. In final -#' table, feature will be the first row. +#' table, feature will be the first row. #' @param level The coloumn name of the level wanted to select. Default is -#' "all". If "all" then retain all taxonomy level, else retain the -#' taxonomy from Kingdom to selected level, drop everything else. -#' Level name should be one of "all", "Kingdom", "Phylum", "Class", -#' "Order", "Family", "Genus", "Species". Taxonomy will be -#' seperated by "|". +#' "all". If "all" then retain all taxonomy level, else retain the taxonomy +#' from Kingdom to selected level, drop everything else. Level name should be +#' one of c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", +#' "Species"). Taxonomy will be seperated by "|". #' @export #' @examples #' construct_lefse_table(demo_phyloseq_object, feature = "diagnosis", diff --git a/R/construct_otu_table.R b/R/construct_otu_table.R index 0adbc32..38f6733 100644 --- a/R/construct_otu_table.R +++ b/R/construct_otu_table.R @@ -3,12 +3,12 @@ #' construct_otu_table can construct a OTU table with a phyloseq object. #' #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample -#' metadata and phylogenetic tree. +#' metadata and phylogenetic tree. #' @param level The coloumn name of the level wanted to select. Default is -#' "all". If "all" then retain all taxonomy level and seperate by -#' "; ", else ONLY retain the given taxonomy level, drop -#' everything else. Level name should be one of "all", "Kingdom", -#' "Phylum", "Class", "Order", "Family", "Genus", "Species". +#' "all". If "all" then retain all taxonomy level and seperate by "; ", else +#' ONLY retain the given taxonomy level, drop everything else. Level name +#' should be one of "all", "Kingdom", "Phylum", "Class", "Order", "Family", +#' "Genus", "Species". #' @export #' @examples #' construct_otu_table(demo_phyloseq_object, level = "Genus") %>% .[,1:5] @@ -19,8 +19,8 @@ construct_otu_table <- function(phyloseq, level = "all") { # Check if input 'level' is correct if (!level %in% c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) { - stop('level should be one of "all", "Kingdom", "Phylum", "Class", "Order", - "Family", "Genus", "Species".')} + stop(paste0('Argument "level" should be one of c("all", "Kingdom", ', + '"Phylum" "Class", "Order", "Family", "Genus", "Species").'))} # Read in sequence table and taxonomy table from phyloseq otu <- otu_table(phyloseq) %>% as.data.frame() %>% t() %>% as.data.frame() %>% rownames_to_column(var = "OTU_ID") diff --git a/R/convert_to_percentage.R b/R/convert_to_percentage.R index 03ec0a5..6002d24 100644 --- a/R/convert_to_percentage.R +++ b/R/convert_to_percentage.R @@ -4,10 +4,9 @@ #' #' @param df a input data frame. #' @param row_sum Default is TRUE. If row_sum == TRUE, then will take every -#' value of a row and divide by the summary of this row, and -#' apply this to every row. If row_sum == FALSE, then will take -#' every value of a column and divide by the summary of this -#' column, and apply this to every column. +#' value of a row and divide by the summary of this row, and apply this to +#' every row. If row_sum == FALSE, then will take every value of a column and +#' divide by the summary of this column, and apply this to every column. #' @export #' @examples #' convert_to_percentage(demo_dada2_result$seq_tab, row_sum = TRUE) %>% .[,1:5] diff --git a/R/extract_metadata_phyloseq.R b/R/extract_metadata_phyloseq.R index 26403d8..05a9a6c 100644 --- a/R/extract_metadata_phyloseq.R +++ b/R/extract_metadata_phyloseq.R @@ -8,10 +8,10 @@ #' column. #' #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample -#' metadata and phylogenetic tree. +#' metadata and phylogenetic tree. #' @param feature The column name of the feature you want to select. Default is -#' NA. If NA, will return the complete metadata, else will -#' return subject id and feature column that's given. +#' NA. If NA, will return the complete metadata, else will return subject id +#' and feature column that's given. #' @export #' @examples #' extract_metadata_phyloseq(demo_phyloseq_object) diff --git a/R/log2fc.R b/R/log2fc.R index 7cbe50b..497db05 100644 --- a/R/log2fc.R +++ b/R/log2fc.R @@ -3,16 +3,15 @@ #' This is a function for plotting log2 fold change. #' #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample -#' metadata and phylogenetic tree. -#' @param feature The column name of the feature you want to select from metadata, -#' e.g. "Phenotype". -#' @param level Which taxonomy level to calculate fold change. Default is NA. If -#' level is given, will use construct_otu_table function to construct -#' OTU table, and use DESeq to calculate fold change. -#' +#' metadata and phylogenetic tree. +#' @param feature The column name of the feature you want to select from +#' metadata, e.g. "Phenotype". +#' @param level Which taxonomy level to calculate fold change. Default is NA. +#' If level is given, will use construct_otu_table function to construct OTU +#' table, and use DESeq to calculate fold change. #' @param p_value The cut off P value for the fold change. Default is 0.05. #' @param save_res Default is FALSE. If TRUE, will save original result -#' DESeq2_result.rds to current working directory. +#' DESeq2_result.rds to current working directory. #' @param reference The control group. Default is NA. #' @param treatment The treatment group. Default is NA. #' @export diff --git a/R/plot_alpha_diversity.R b/R/plot_alpha_diversity.R index d69c448..04e6c48 100644 --- a/R/plot_alpha_diversity.R +++ b/R/plot_alpha_diversity.R @@ -3,19 +3,18 @@ #' This is a function for plotting alpha diversity. #' #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample -#' metadata and phylogenetic tree. +#' metadata and phylogenetic tree. #' @param feature The column name of the feature you want to select from -#' metadata. +#' metadata. #' @param feature2 The column name of another feature you want to select from -#' metadata, e.g. "Gender", which will make the plots draw in -#' different shapes. Default is NA. +#' metadata, e.g. "Gender", which will make the plots draw in different shapes. +#' Default is NA. #' @param measures The measures to calculate alpha diversity. Default is NA. If -#' NA, all available alpha diversity measures will be -#' calculated and generate a table. If not NA, measures -#' should be one of "Observed", "Chao1", "ACE", "Shannon", -#' "Simpson", "InvSimpson", "Fisher". +#' NA, all available alpha diversity measures will be calculated and generate a +#' table. If not NA, measures should be one of c("Observed", "Chao1", "ACE", +#' "Shannon", "Simpson", "InvSimpson", "Fisher"). #' @param p_test The p-value to test alpha diversity. p_test should be either -#' "wilcox" or "kruskal". +#' "wilcox" or "kruskal". PS: "wilcox" can only work with two groups. #' @export #' @examples #' plot_alpha_diversity(demo_phyloseq_object, feature = "diagnosis", @@ -27,15 +26,15 @@ plot_alpha_diversity <- function (phyloseq, feature, feature2 = NA, if (!is.na(measures)) { if (!measures %in% c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")) { - stop('measures should be one of c("Observed", "Chao1", "ACE", "Shannon", - "Simpson", "InvSimpson", "Fisher").') + stop(paste0('Argument "measures" should be one of c("Observed", "Chao1"', + ', "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher").')) } else { ## Step 1: Use plot_richness function to calculate alpha diversity alpha_diversity <- plot_richness(phyloseq, x = feature, measures = measures) ## Step 2: Calculate p-value if (p_test == "wilcox") { - # Prepare feature table for calculating Mann-Whitney U test(2 groups only) + # Prepare feature table for calculating Mann-Whitney U test feature_tab_4_MWtest <- extract_metadata_phyloseq(phyloseq, feature) # Extract feature levels feature_0 <- feature_tab_4_MWtest[[feature]] %>% unique() %>% .[1] @@ -48,11 +47,11 @@ plot_alpha_diversity <- function (phyloseq, feature, feature2 = NA, p_value <- wilcox.test(alpha_diversity$data$value ~ feature_tab_4_MWtest[[feature]])$p.value } else if (p_test == "kruskal") { - # Kruskal test(for 2 or more groups) + # Kruskal test p_value <- kruskal.test(alpha_diversity$data$value, factor(alpha_diversity$data[,feature]))$p.value } else { - stop("The input p_test is not supported") + stop("The input p_test is not supported.") } ## Step 3: Plot alpha diversity y <- "value" diff --git a/R/plot_beta_diversity.R b/R/plot_beta_diversity.R index 54972c0..972e9e5 100644 --- a/R/plot_beta_diversity.R +++ b/R/plot_beta_diversity.R @@ -11,17 +11,20 @@ #' @param method The method to calculate beta diversity. Method should be one #' of "bray", "jaccard", "unifrac", "wunifrac". Default is "bray". #' PS: "unifrac" and "wunifrac" require a phylogenetic tree. +#' @param colors A color vector for the plot, the number of colors need to +#' match the number of feature. Default is NULL, if NULL, plot_alpha_diversity +#' will use ggsci::scale_color_jco for the plot. #' @export #' @examples #' plot_beta_diversity(demo_phyloseq_object, feature = "diagnosis") plot_beta_diversity <- function(phyloseq, feature, feature2 = NA, - method = "bray"){ + method = "bray", colors = NULL){ set.seed(99) ## Step 1: Calculate beta diversity if (!method %in% c("bray", "jaccard", "unifrac", "wunifrac")) { - stop('Beta diversity method should be one of "bray", "jaccard", "unifrac", - "wunifrac".') + stop(paste0('Beta diversity method should be one of c("bray", "jaccard", ', + '"unifrac", "wunifrac").')) } else if (method %in% c("unifrac", "wunifrac")) { # Requires phyloseq-class that contains both an otu_table and a # phylogenetic tree @@ -41,7 +44,11 @@ plot_beta_diversity <- function(phyloseq, feature, feature2 = NA, # Join two tables beta_plot <- left_join(PC, metadata) # Print beta-diversity table - select(beta_plot, SampleID, !!feature, PC1, PC2) %>% print() + if (is.na(feature2)) { + select(beta_plot, SampleID, !!feature, PC1, PC2) %>% print() + } else { + select(beta_plot, SampleID, !!feature, !!feature2, PC1, PC2) %>% print() + } ## Step 3: Plot beta diversity # Make x-axis and y-axis names for aes_string x_name <- "PC1" @@ -66,7 +73,6 @@ plot_beta_diversity <- function(phyloseq, feature, feature2 = NA, axis.text.x = element_text(size = 12), legend.text = element_text(size = 12), strip.text.x = element_text(size = 14)) - p + ggsci::scale_color_jco() + ggsci::scale_fill_jco() } else { p <- ggplot(data = beta_plot, # Use aes_string() to pass variables to ggplot @@ -85,6 +91,13 @@ plot_beta_diversity <- function(phyloseq, feature, feature2 = NA, axis.text.x = element_text(size = 12), legend.text = element_text(size = 12), strip.text.x = element_text(size = 14)) + } + if (is.null(colors)) { p + ggsci::scale_color_jco() + ggsci::scale_fill_jco() + } else if (length(colors) != length(unique(beta_plot[[feature]]))) { + stop(paste0("The number of colors and the number of ", feature, + " does not mtach.")) + } else { + p + scale_color_manual(values = colors) } } diff --git a/R/plot_correlation.R b/R/plot_correlation.R index 26f04e1..8aba611 100644 --- a/R/plot_correlation.R +++ b/R/plot_correlation.R @@ -13,8 +13,8 @@ plot_correlation <- function (cor_tab, x, y, method = "pearson") { # Notice: Colnames of the input table can only be letters or numbers. if (any(str_detect(c(x, y), '\\W'))) { - stop("Colnames of the input columns can only contain letters or numbers, - or it can't be recognized when plotting.") + stop(paste0("Colnames of the input columns can only contain letters or', + ' numbers, or it can't be recognized when plotting.")) } if (method == "pearson") { unit <- "Pearson's r" diff --git a/R/plot_sparsity.R b/R/plot_sparsity.R index b09cbac..38a63a9 100644 --- a/R/plot_sparsity.R +++ b/R/plot_sparsity.R @@ -1,29 +1,28 @@ #' plot_sparsity #' -#' plot_sparsity can show the sparsity of an OTU table. It will calculate the -#' prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis +#' plot_sparsity can show the sparsity of an OTU table. It will calculate the +#' prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis #' is OTU count. #' -#' @param otu_table An OTU table in phyloseq object format. Rownames are +#' @param otu_table An OTU table in phyloseq object format. Rownames are #' sample ID, colnames are taxa. -#' @param binwidth The width of the bins. Can be specified as a numeric value, -#' or a function that calculates width from x. The default is to use bins bins -#' that cover the range of the data. You should always override this value, -#' exploring multiple widths to find the best to illustrate the stories in your +#' @param binwidth The width of the bins. Can be specified as a numeric value, +#' or a function that calculates width from x. The default is to use bins bins +#' that cover the range of the data. You should always override this value, +#' exploring multiple widths to find the best to illustrate the stories in your #' data. -#' #' @export #' @examples #' plot_sparsity(dada2_res$seq_tab, 5) plot_sparsity <- function(otu_table, binwidth = NA) { - otu_table <- otu_table %>% + otu_table <- otu_table %>% # Protect rownames (Tidyverse will automatically remove rownames) - rownames_to_column() %>% + rownames_to_column() %>% # Remove all 0 OTUs - filter_if(is.numeric, any_vars(. != 0)) %>% + filter_if(is.numeric, any_vars(. != 0)) %>% # Recover rownames - column_to_rownames() %>% + column_to_rownames() %>% # Transposes OTU table t() %>% as.data.frame() # Replace 0 with NA @@ -31,23 +30,23 @@ plot_sparsity <- function(otu_table, binwidth = NA) { # Calculate sparsity otu_table <- apply( otu_table, 1, function(x) round((sum(!is.na(x))/ncol(otu_table))*100, 0) - ) %>% - as.data.frame() %>% + ) %>% + as.data.frame() %>% rownames_to_column(var = 'otu') colnames(otu_table)[2] <- 'prevalence' if (!is.na(binwidth)) { - ggplot(otu_table, aes(prevalence)) + - geom_histogram(binwidth = binwidth) + - xlab('Prevalence of each OTU') + - ylab('Count') + - theme_bw() + + ggplot(otu_table, aes(prevalence)) + + geom_histogram(binwidth = binwidth) + + xlab('Prevalence of each OTU') + + ylab('Count') + + theme_bw() + theme(panel.grid = element_blank()) } else { - ggplot(otu_table, aes(prevalence)) + - geom_histogram() + - xlab('Prevalence of each OTU') + - ylab('Count') + - theme_bw() + + ggplot(otu_table, aes(prevalence)) + + geom_histogram() + + xlab('Prevalence of each OTU') + + ylab('Count') + + theme_bw() + theme(panel.grid = element_blank()) } } diff --git a/R/plot_stacked_bar.R b/R/plot_stacked_bar.R index 997869b..d0692b8 100644 --- a/R/plot_stacked_bar.R +++ b/R/plot_stacked_bar.R @@ -34,24 +34,25 @@ plot_stacked_bar <- function(phyloseq = NULL, level = NA, feature = NA, if (is.null(phyloseq)) { if (is.null(otu_table)) { # If no phyloseq, require otu_table - stop("Argument 'phyloseq' and 'otu_table' are both missing, - please input a phyloseq object or an OTU table and metadata.") + stop(paste0("Argument 'phyloseq' and 'otu_table' are both missing, ", + "please input a phyloseq object or an OTU table and", + " metadata.")) } else { if (is.null(metadata)) { # If have otu_table, require metadata - stop("Argument 'metadata' is missing, please input a metadata for the - OTU table.") + stop(paste0("Argument 'metadata' is missing, please input a metadata", + " for the OTU table.")) } } } else { if (!is.null(otu_table)) { # If have phyloseq, require no otu_table - stop("Argument 'phyloseq' and 'otu_table' are both detected, please input - one of them, not both.") + stop(paste0("Argument 'phyloseq' and 'otu_table' are both detected, ", + "please input one of them, not both.")) } else if (is.na(level)) { # If have phyloseq, require level - stop("Argument 'level' is missing. Plaese choose a taxonomy level to plot - stacked bar.") + stop(paste0("Argument 'level' is missing. Plaese choose a taxonomy ", + "level to plot stacked bar.")) } } # First construct otu then convert to percentage diff --git a/R/track_reads_dada2.R b/R/track_reads_dada2.R index a67a05e..d669f5e 100644 --- a/R/track_reads_dada2.R +++ b/R/track_reads_dada2.R @@ -8,17 +8,16 @@ #' stage in dada2 workflow, Y-axis will be the reads counts. #' #' @param reads_track The reads track data frame from Xbiome 16S pipeline dada2 -#' workflow result. +#' workflow result. #' @param single_end Default is FALSE. If single_end == TRUE, means the -#' sequence files are single end, the x-axis will contain -#' 'input', 'filtered', 'dereplicated', 'nonchim'. If -#' single_end == FALSE, means the sequence files are paired -#' end, the x-axis will contain 'input', 'filtered', -#' 'denoisedF', 'denoisedR', 'merged', 'nonchim'. +#' sequence files are single end, the x-axis will contain 'input', 'filtered', +#' 'dereplicated', 'nonchim'. If single_end == FALSE, means the sequence files +#' are paired end, the x-axis will contain 'input', 'filtered', 'denoisedF', +#' 'denoisedR', 'merged', 'nonchim'. #' @param relative_abundance Default is FALSE. If TRUE, will turn values to -#' relative abundance. +#' relative abundance. #' @param legend_position Legend position. Default is top. One of "none", -#' "left", "right", "bottom", "top". +#' "left", "right", "bottom", "top". #' @export #' @examples #' track_reads_dada2(demo_dada2_result$reads_track, single_end = FALSE) diff --git a/man/construct_lefse_table.Rd b/man/construct_lefse_table.Rd index 3b2c01c..466d883 100644 --- a/man/construct_lefse_table.Rd +++ b/man/construct_lefse_table.Rd @@ -14,11 +14,10 @@ metadata and phylogenetic tree.} table, feature will be the first row.} \item{level}{The coloumn name of the level wanted to select. Default is -"all". If "all" then retain all taxonomy level, else retain the -taxonomy from Kingdom to selected level, drop everything else. -Level name should be one of "all", "Kingdom", "Phylum", "Class", -"Order", "Family", "Genus", "Species". Taxonomy will be -seperated by "|".} +"all". If "all" then retain all taxonomy level, else retain the taxonomy +from Kingdom to selected level, drop everything else. Level name should be +one of c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", +"Species"). Taxonomy will be seperated by "|".} } \description{ construct_lefse_table can construct a LEfSe-format OTU table. diff --git a/man/construct_otu_table.Rd b/man/construct_otu_table.Rd index e10d41c..5ba875f 100644 --- a/man/construct_otu_table.Rd +++ b/man/construct_otu_table.Rd @@ -11,10 +11,10 @@ construct_otu_table(phyloseq, level = "all") metadata and phylogenetic tree.} \item{level}{The coloumn name of the level wanted to select. Default is -"all". If "all" then retain all taxonomy level and seperate by -"; ", else ONLY retain the given taxonomy level, drop -everything else. Level name should be one of "all", "Kingdom", -"Phylum", "Class", "Order", "Family", "Genus", "Species".} +"all". If "all" then retain all taxonomy level and seperate by "; ", else +ONLY retain the given taxonomy level, drop everything else. Level name +should be one of "all", "Kingdom", "Phylum", "Class", "Order", "Family", +"Genus", "Species".} } \description{ construct_otu_table can construct a OTU table with a phyloseq object. diff --git a/man/convert_to_percentage.Rd b/man/convert_to_percentage.Rd index c3f46f7..13eb4c4 100644 --- a/man/convert_to_percentage.Rd +++ b/man/convert_to_percentage.Rd @@ -10,10 +10,9 @@ convert_to_percentage(df, row_sum = TRUE) \item{df}{a input data frame.} \item{row_sum}{Default is TRUE. If row_sum == TRUE, then will take every -value of a row and divide by the summary of this row, and -apply this to every row. If row_sum == FALSE, then will take -every value of a column and divide by the summary of this -column, and apply this to every column.} +value of a row and divide by the summary of this row, and apply this to +every row. If row_sum == FALSE, then will take every value of a column and +divide by the summary of this column, and apply this to every column.} } \description{ convert_to_percentage can convert a data frame to percentage. diff --git a/man/extract_metadata_phyloseq.Rd b/man/extract_metadata_phyloseq.Rd index 25331c9..5d3a17a 100644 --- a/man/extract_metadata_phyloseq.Rd +++ b/man/extract_metadata_phyloseq.Rd @@ -11,8 +11,8 @@ extract_metadata_phyloseq(phyloseq, feature = NA) metadata and phylogenetic tree.} \item{feature}{The column name of the feature you want to select. Default is -NA. If NA, will return the complete metadata, else will -return subject id and feature column that's given.} +NA. If NA, will return the complete metadata, else will return subject id +and feature column that's given.} } \description{ extract_metadata_phyloseq can extract metadata from a phyloseq object. diff --git a/man/log2fc.Rd b/man/log2fc.Rd index a1797b5..bc6e010 100644 --- a/man/log2fc.Rd +++ b/man/log2fc.Rd @@ -11,12 +11,12 @@ log2fc(phyloseq, feature, level = NA, p_value = 0.05, save_res = FALSE, \item{phyloseq}{A phyloseq object contain otu table, taxonomy table, sample metadata and phylogenetic tree.} -\item{feature}{The column name of the feature you want to select from metadata, -e.g. "Phenotype".} +\item{feature}{The column name of the feature you want to select from +metadata, e.g. "Phenotype".} -\item{level}{Which taxonomy level to calculate fold change. Default is NA. If -level is given, will use construct_otu_table function to construct -OTU table, and use DESeq to calculate fold change.} +\item{level}{Which taxonomy level to calculate fold change. Default is NA. +If level is given, will use construct_otu_table function to construct OTU +table, and use DESeq to calculate fold change.} \item{p_value}{The cut off P value for the fold change. Default is 0.05.} diff --git a/man/plot_alpha_diversity.Rd b/man/plot_alpha_diversity.Rd index c074429..969eb3a 100644 --- a/man/plot_alpha_diversity.Rd +++ b/man/plot_alpha_diversity.Rd @@ -15,17 +15,16 @@ metadata and phylogenetic tree.} metadata.} \item{feature2}{The column name of another feature you want to select from -metadata, e.g. "Gender", which will make the plots draw in -different shapes. Default is NA.} +metadata, e.g. "Gender", which will make the plots draw in different shapes. +Default is NA.} \item{measures}{The measures to calculate alpha diversity. Default is NA. If -NA, all available alpha diversity measures will be -calculated and generate a table. If not NA, measures -should be one of "Observed", "Chao1", "ACE", "Shannon", -"Simpson", "InvSimpson", "Fisher".} +NA, all available alpha diversity measures will be calculated and generate a +table. If not NA, measures should be one of c("Observed", "Chao1", "ACE", +"Shannon", "Simpson", "InvSimpson", "Fisher").} \item{p_test}{The p-value to test alpha diversity. p_test should be either -"wilcox" or "kruskal".} +"wilcox" or "kruskal". PS: "wilcox" can only work with two groups.} } \description{ This is a function for plotting alpha diversity. diff --git a/man/plot_beta_diversity.Rd b/man/plot_beta_diversity.Rd index 1e86c58..6c38ec6 100644 --- a/man/plot_beta_diversity.Rd +++ b/man/plot_beta_diversity.Rd @@ -4,7 +4,8 @@ \alias{plot_beta_diversity} \title{plot_beta_diversity} \usage{ -plot_beta_diversity(phyloseq, feature, feature2 = NA, method = "bray") +plot_beta_diversity(phyloseq, feature, feature2 = NA, method = "bray", + colors = NULL) } \arguments{ \item{phyloseq}{A phyloseq object contain otu table, taxonomy table, sample @@ -19,6 +20,10 @@ metadata, which will show in different shape, e.g. "Gender". Default is NA.} \item{method}{The method to calculate beta diversity. Method should be one of "bray", "jaccard", "unifrac", "wunifrac". Default is "bray". PS: "unifrac" and "wunifrac" require a phylogenetic tree.} + +\item{colors}{A color vector for the plot, the number of colors need to +match the number of feature. Default is NULL, if NULL, plot_alpha_diversity +will use ggsci::scale_color_jco for the plot.} } \description{ This is a function for plotting beta diversity. diff --git a/man/plot_sparsity.Rd b/man/plot_sparsity.Rd index 64a588d..4c95f8c 100644 --- a/man/plot_sparsity.Rd +++ b/man/plot_sparsity.Rd @@ -7,18 +7,18 @@ plot_sparsity(otu_table, binwidth = NA) } \arguments{ -\item{otu_table}{An OTU table in phyloseq object format. Rownames are +\item{otu_table}{An OTU table in phyloseq object format. Rownames are sample ID, colnames are taxa.} -\item{binwidth}{The width of the bins. Can be specified as a numeric value, -or a function that calculates width from x. The default is to use bins bins -that cover the range of the data. You should always override this value, -exploring multiple widths to find the best to illustrate the stories in your +\item{binwidth}{The width of the bins. Can be specified as a numeric value, +or a function that calculates width from x. The default is to use bins bins +that cover the range of the data. You should always override this value, +exploring multiple widths to find the best to illustrate the stories in your data.} } \description{ -plot_sparsity can show the sparsity of an OTU table. It will calculate the -prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis +plot_sparsity can show the sparsity of an OTU table. It will calculate the +prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis is OTU count. } \examples{ diff --git a/man/track_reads_dada2.Rd b/man/track_reads_dada2.Rd index 4a88edc..86b3031 100644 --- a/man/track_reads_dada2.Rd +++ b/man/track_reads_dada2.Rd @@ -12,11 +12,10 @@ track_reads_dada2(reads_track, single_end = FALSE, workflow result.} \item{single_end}{Default is FALSE. If single_end == TRUE, means the -sequence files are single end, the x-axis will contain -'input', 'filtered', 'dereplicated', 'nonchim'. If -single_end == FALSE, means the sequence files are paired -end, the x-axis will contain 'input', 'filtered', -'denoisedF', 'denoisedR', 'merged', 'nonchim'.} +sequence files are single end, the x-axis will contain 'input', 'filtered', +'dereplicated', 'nonchim'. If single_end == FALSE, means the sequence files +are paired end, the x-axis will contain 'input', 'filtered', 'denoisedF', +'denoisedR', 'merged', 'nonchim'.} \item{relative_abundance}{Default is FALSE. If TRUE, will turn values to relative abundance.}