diff --git a/.Rproj.user/E4C83D23/console06/INDEX001 b/.Rproj.user/E4C83D23/console06/INDEX001
index daaab50..dd37ffc 100644
--- a/.Rproj.user/E4C83D23/console06/INDEX001
+++ b/.Rproj.user/E4C83D23/console06/INDEX001
@@ -1 +1 @@
-[{"allow_restart":true,"alt_buffer":false,"autoclose":1,"buffered_output":"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","caption":"Terminal 1","channel_id":"4185","channel_mode":1,"child_procs":false,"cols":87,"cwd":"~/Repositary/visual16S","exit_code":1,"handle":"57531550","interaction_mode":2,"max_output_lines":1000,"restarted":true,"rows":60,"shell_type":7,"show_on_output":false,"terminal_sequence":1,"title":"/home/yeguanhua/Repositary/visual16S","track_env":true,"zombie":false}]
\ No newline at end of file
+[{"allow_restart":true,"alt_buffer":false,"autoclose":1,"buffered_output":"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n","caption":"Terminal 1","channel_id":"4977","channel_mode":1,"child_procs":false,"cols":87,"cwd":"~/Repositary/visual16S","exit_code":null,"handle":"57531550","interaction_mode":2,"max_output_lines":1000,"restarted":true,"rows":60,"shell_type":7,"show_on_output":false,"terminal_sequence":1,"title":"yeguanhua@XBIOME-SERVER-CPU: ~/Repositary/visual16S","track_env":true,"zombie":false}]
\ No newline at end of file
diff --git a/.Rproj.user/E4C83D23/pcs/files-pane.pper b/.Rproj.user/E4C83D23/pcs/files-pane.pper
index ff8e7d9..55ef08c 100644
--- a/.Rproj.user/E4C83D23/pcs/files-pane.pper
+++ b/.Rproj.user/E4C83D23/pcs/files-pane.pper
@@ -1,5 +1,5 @@
 {
-    "path" : "~/Repositary/visual16S",
+    "path" : "~/xviz",
     "sortOrder" : [
         {
             "ascending" : true,
diff --git a/.Rproj.user/E4C83D23/pcs/source-pane.pper b/.Rproj.user/E4C83D23/pcs/source-pane.pper
index 9efb51b..70829f6 100644
--- a/.Rproj.user/E4C83D23/pcs/source-pane.pper
+++ b/.Rproj.user/E4C83D23/pcs/source-pane.pper
@@ -1,3 +1,3 @@
 {
-    "activeTab" : -1
+    "activeTab" : 1
 }
\ No newline at end of file
diff --git a/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper b/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper
index e273a22..64b861b 100644
--- a/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper
+++ b/.Rproj.user/E4C83D23/pcs/windowlayoutstate.pper
@@ -1,13 +1,13 @@
 {
     "left" : {
         "panelheight" : 1325,
-        "splitterpos" : 546,
-        "topwindowstate" : "HIDE",
+        "splitterpos" : 543,
+        "topwindowstate" : "NORMAL",
         "windowheight" : 1379
     },
     "right" : {
         "panelheight" : 1325,
-        "splitterpos" : 543,
+        "splitterpos" : 542,
         "topwindowstate" : "NORMAL",
         "windowheight" : 1379
     }
diff --git a/.Rproj.user/E4C83D23/pcs/workbench-pane.pper b/.Rproj.user/E4C83D23/pcs/workbench-pane.pper
index b15bf23..0e24b84 100644
--- a/.Rproj.user/E4C83D23/pcs/workbench-pane.pper
+++ b/.Rproj.user/E4C83D23/pcs/workbench-pane.pper
@@ -1,5 +1,5 @@
 {
-    "TabSet1" : 3,
+    "TabSet1" : 0,
     "TabSet2" : 0,
     "TabZoom" : {
     }
diff --git a/.Rproj.user/E4C83D23/persistent-state b/.Rproj.user/E4C83D23/persistent-state
index d2e6b78..16ee712 100644
--- a/.Rproj.user/E4C83D23/persistent-state
+++ b/.Rproj.user/E4C83D23/persistent-state
@@ -1,8 +1,8 @@
 build-last-errors="[]"
-build-last-errors-base-dir="~/Repositary/visual16S/"
-build-last-outputs="[{\"output\":\"==> R CMD INSTALL --no-multiarch --with-keep.source visual16S\\n\\n\",\"type\":0},{\"output\":\"No protocol specified\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* installing to library ‘/home/yeguanhua/R/x86_64-pc-linux-gnu-library/3.5’\\n\",\"type\":1},{\"output\":\"* installing *source* package ‘visual16S’ ...\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** R\\n\",\"type\":1},{\"output\":\"** data\\n\",\"type\":1},{\"output\":\"*** moving datasets to lazyload DB\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** byte-compile and prepare package for lazy loading\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** help\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"*** installing help indices\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"** building package indices\\n\",\"type\":1},{\"output\":\"** testing if installed package can be loaded\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"No protocol specified\\n\",\"type\":1},{\"output\":\"\",\"type\":1},{\"output\":\"* DONE (visual16S)\\n\",\"type\":1},{\"output\":\"\",\"type\":1}]"
+build-last-errors-base-dir=""
+build-last-outputs="[]"
 compile_pdf_state="{\"errors\":[],\"output\":\"\",\"running\":false,\"tab_visible\":false,\"target_file\":\"\"}"
-files.monitored-path=""
+files.monitored-path="~/BJCancer/analysis"
 find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":true,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOff\":[],\"matchOn\":[]},\"running\":false}"
 imageDirtyState="1"
 saveActionState="0"
diff --git a/.Rproj.user/E4C83D23/sources/prop/INDEX b/.Rproj.user/E4C83D23/sources/prop/INDEX
index 1cf0e4b..3373793 100644
--- a/.Rproj.user/E4C83D23/sources/prop/INDEX
+++ b/.Rproj.user/E4C83D23/sources/prop/INDEX
@@ -1,14 +1,28 @@
+~%2FBJCancer%2FMJ%2FMJ20190403%2Fanalysis%2FMJ_190403_analysis.Rmd="2E7B383F"
 ~%2FBJCancer%2FMJ%2Fanalysis%2Fanalysis_mj2.Rmd="3715EBC3"
 ~%2FBJCancer%2FMJ20190403011%2Fanalysis%2FMJ_190403_analysis.Rmd="5616A48D"
+~%2FBJCancer%2Fanalysis%2Fanalysis.R="312F76A"
+~%2FBJCancer%2Fanalysis%2Fsubject_extract.R="46FD7822"
+~%2FBJCancer%2Fmetadata%2Fprocessing.R="298A5E2"
 ~%2FR%2F16S-analysis-visualization%2FDESCRIPTION="8FFE7823"
 ~%2FR%2F16S-analysis-visualization%2FREADME.Rmd="5DA43DB2"
 ~%2FR%2FXbiome16SVisualization%2FDESCRIPTION="EAFED69B"
+~%2FRepositary%2F16S_analysis_functions_Yip.R="C94066A8"
 ~%2FRepositary%2Fvisual16S%2FDESCRIPTION="718DAA5C"
 ~%2FRepositary%2Fvisual16S%2FR%2Falpha_diversity_plot.R="AD67D71C"
+~%2FRepositary%2Fvisual16S%2FR%2Fconstruct_lefse_table.R="E2E70350"
+~%2FRepositary%2Fvisual16S%2FR%2Fconstruct_otu_table.R="9F3D6689"
+~%2FRepositary%2Fvisual16S%2FR%2Flog2fc.R="80BEE29D"
+~%2FRepositary%2Fvisual16S%2FR%2Fplot_alpha_diversity.R="C34039C6"
+~%2FRepositary%2Fvisual16S%2FR%2Fplot_beta_diversity.R="2E6CF992"
+~%2FRepositary%2Fvisual16S%2FR%2Fplot_correlation.R="C6A8435F"
+~%2FRepositary%2Fvisual16S%2FR%2Fplot_sparsity.R="B8AEF7F3"
 ~%2FRepositary%2Fvisual16S%2FR%2Fplot_stacked_bar.R="9D821D65"
+~%2FRepositary%2Fvisual16S%2FR%2Ftrack_reads_dada2.R="D1F8C44E"
 ~%2FRepositary%2Fvisual16S%2FREADME.Rmd="B4C7C8F"
 ~%2FRepositary%2Fvisual16S%2FREADME.md="CDBA173"
 ~%2FRepositary%2Fvisualization416S%2FREADME.Rmd="A92803F3"
+~%2Fxviz%2FR%2Fconstruct_otu_table.R="844B86E3"
 ~%2Fxviz%2FR%2Fplot_alpha_diversity.R="27BC8921"
 ~%2Fxviz%2FR%2Fplot_beta_diversity.R="E3BEF9E6"
 ~%2Fxviz%2FR%2Fplot_stacked_bar.R="BDEA371A"
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
index 9c1a82f..cb77031 100644
--- a/.Rproj.user/shared/notebooks/paths
+++ b/.Rproj.user/shared/notebooks/paths
@@ -1,7 +1,21 @@
+/home/yeguanhua/BJCancer/MJ/MJ20190403/analysis/MJ_190403_analysis.Rmd="960A7DF"
+/home/yeguanhua/BJCancer/analysis/analysis.R="C933209"
+/home/yeguanhua/BJCancer/analysis/subject_extract.R="C8C14983"
+/home/yeguanhua/BJCancer/metadata/processing.R="5E210D39"
+/home/yeguanhua/Repositary/16S_analysis_functions_Yip.R="193FF21C"
 /home/yeguanhua/Repositary/visual16S/DESCRIPTION="D6FAFC4C"
+/home/yeguanhua/Repositary/visual16S/R/construct_lefse_table.R="FE82BBBF"
+/home/yeguanhua/Repositary/visual16S/R/construct_otu_table.R="1CB7F17F"
+/home/yeguanhua/Repositary/visual16S/R/log2fc.R="25C0CF72"
+/home/yeguanhua/Repositary/visual16S/R/plot_alpha_diversity.R="80616A8C"
+/home/yeguanhua/Repositary/visual16S/R/plot_beta_diversity.R="CEFFBA5"
+/home/yeguanhua/Repositary/visual16S/R/plot_correlation.R="8C1009F"
+/home/yeguanhua/Repositary/visual16S/R/plot_sparsity.R="4BF1C0A9"
 /home/yeguanhua/Repositary/visual16S/R/plot_stacked_bar.R="C1BF12DA"
+/home/yeguanhua/Repositary/visual16S/R/track_reads_dada2.R="B6F5D662"
 /home/yeguanhua/Repositary/visual16S/README.Rmd="EFE45572"
 /home/yeguanhua/Repositary/visual16S/README.md="802CD8B6"
+/home/yeguanhua/xviz/R/construct_otu_table.R="7A12CFBD"
 /home/yeguanhua/xviz/R/plot_alpha_diversity.R="33D2C56B"
 /home/yeguanhua/xviz/R/plot_beta_diversity.R="41254790"
 /home/yeguanhua/xviz/R/plot_stacked_bar.R="6591DBD3"
diff --git a/R/construct_lefse_table.R b/R/construct_lefse_table.R
index 9558e94..c175ef1 100644
--- a/R/construct_lefse_table.R
+++ b/R/construct_lefse_table.R
@@ -5,15 +5,14 @@
 #' Format Data for LEfSe for more details.
 #'
 #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample
-#'                 metadata and phylogenetic tree.
+#' metadata and phylogenetic tree.
 #' @param feature The column name of the feature you want to select. In final
-#'                table, feature will be the first row.
+#' table, feature will be the first row.
 #' @param level The coloumn name of the level wanted to select. Default is
-#'              "all". If "all" then retain all taxonomy level, else retain the
-#'              taxonomy from Kingdom to selected level, drop everything else.
-#'              Level name should be one of "all", "Kingdom", "Phylum", "Class",
-#'              "Order", "Family", "Genus", "Species". Taxonomy will be
-#'              seperated by "|".
+#' "all". If "all" then retain all taxonomy level, else retain the taxonomy
+#' from Kingdom to selected level, drop everything else. Level name should be
+#' one of c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus",
+#' "Species"). Taxonomy will be seperated by "|".
 #' @export
 #' @examples
 #' construct_lefse_table(demo_phyloseq_object, feature = "diagnosis",
diff --git a/R/construct_otu_table.R b/R/construct_otu_table.R
index 0adbc32..38f6733 100644
--- a/R/construct_otu_table.R
+++ b/R/construct_otu_table.R
@@ -3,12 +3,12 @@
 #' construct_otu_table can construct a OTU table with a phyloseq object.
 #'
 #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample
-#'                 metadata and phylogenetic tree.
+#' metadata and phylogenetic tree.
 #' @param level The coloumn name of the level wanted to select. Default is
-#'              "all". If "all" then retain all taxonomy level and seperate by
-#'              "; ", else ONLY retain the given taxonomy level, drop
-#'              everything else. Level name should be one of "all", "Kingdom",
-#'              "Phylum", "Class", "Order", "Family", "Genus", "Species".
+#' "all". If "all" then retain all taxonomy level and seperate by "; ", else
+#' ONLY retain the given taxonomy level, drop everything else. Level name
+#' should be one of "all", "Kingdom", "Phylum", "Class", "Order", "Family",
+#' "Genus", "Species".
 #' @export
 #' @examples
 #' construct_otu_table(demo_phyloseq_object, level = "Genus") %>% .[,1:5]
@@ -19,8 +19,8 @@ construct_otu_table <- function(phyloseq, level = "all") {
   # Check if input 'level' is correct
   if (!level %in% c("all", "Kingdom", "Phylum", "Class", "Order", "Family",
                     "Genus", "Species")) {
-    stop('level should be one of "all", "Kingdom", "Phylum", "Class", "Order",
-         "Family", "Genus", "Species".')}
+    stop(paste0('Argument "level" should be one of c("all", "Kingdom", ',
+                '"Phylum" "Class", "Order", "Family", "Genus", "Species").'))}
   # Read in sequence table and taxonomy table from phyloseq
   otu <- otu_table(phyloseq) %>% as.data.frame() %>% t() %>%
     as.data.frame() %>% rownames_to_column(var = "OTU_ID")
diff --git a/R/convert_to_percentage.R b/R/convert_to_percentage.R
index 03ec0a5..6002d24 100644
--- a/R/convert_to_percentage.R
+++ b/R/convert_to_percentage.R
@@ -4,10 +4,9 @@
 #'
 #' @param df a input data frame.
 #' @param row_sum Default is TRUE. If row_sum == TRUE, then will take every
-#'                value of a row and divide by the summary of this row, and
-#'                apply this to every row. If row_sum == FALSE, then will take
-#'                every value of a column and divide by the summary of this
-#'                column, and apply this to every column.
+#' value of a row and divide by the summary of this row, and apply this to
+#' every row. If row_sum == FALSE, then will take every value of a column and
+#' divide by the summary of this column, and apply this to every column.
 #' @export
 #' @examples
 #' convert_to_percentage(demo_dada2_result$seq_tab, row_sum = TRUE) %>% .[,1:5]
diff --git a/R/extract_metadata_phyloseq.R b/R/extract_metadata_phyloseq.R
index 26403d8..05a9a6c 100644
--- a/R/extract_metadata_phyloseq.R
+++ b/R/extract_metadata_phyloseq.R
@@ -8,10 +8,10 @@
 #' column.
 #'
 #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample
-#'                 metadata and phylogenetic tree.
+#' metadata and phylogenetic tree.
 #' @param feature The column name of the feature you want to select. Default is
-#'                NA. If NA, will return the complete metadata, else will
-#'                return subject id and feature column that's given.
+#' NA. If NA, will return the complete metadata, else will return subject id
+#' and feature column that's given.
 #' @export
 #' @examples
 #' extract_metadata_phyloseq(demo_phyloseq_object)
diff --git a/R/log2fc.R b/R/log2fc.R
index 7cbe50b..497db05 100644
--- a/R/log2fc.R
+++ b/R/log2fc.R
@@ -3,16 +3,15 @@
 #' This is a function for plotting log2 fold change.
 #'
 #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample
-#'                 metadata and phylogenetic tree.
-#' @param feature The column name of the feature you want to select from metadata,
-#'                e.g. "Phenotype".
-#' @param level Which taxonomy level to calculate fold change. Default is NA. If
-#'              level is given, will use construct_otu_table function to construct
-#'              OTU table, and use DESeq to calculate fold change.
-#'
+#' metadata and phylogenetic tree.
+#' @param feature The column name of the feature you want to select from
+#' metadata, e.g. "Phenotype".
+#' @param level Which taxonomy level to calculate fold change. Default is NA.
+#' If level is given, will use construct_otu_table function to construct OTU
+#' table, and use DESeq to calculate fold change.
 #' @param p_value The cut off P value for the fold change. Default is 0.05.
 #' @param save_res Default is FALSE. If TRUE, will save original result
-#'                 DESeq2_result.rds to current working directory.
+#' DESeq2_result.rds to current working directory.
 #' @param reference The control group. Default is NA.
 #' @param treatment The treatment group. Default is NA.
 #' @export
diff --git a/R/plot_alpha_diversity.R b/R/plot_alpha_diversity.R
index d69c448..04e6c48 100644
--- a/R/plot_alpha_diversity.R
+++ b/R/plot_alpha_diversity.R
@@ -3,19 +3,18 @@
 #' This is a function for plotting alpha diversity.
 #'
 #' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample
-#'                 metadata and phylogenetic tree.
+#' metadata and phylogenetic tree.
 #' @param feature The column name of the feature you want to select from
-#'                metadata.
+#' metadata.
 #' @param feature2 The column name of another feature you want to select from
-#'                metadata, e.g. "Gender", which will make the plots draw in
-#'                different shapes. Default is NA.
+#' metadata, e.g. "Gender", which will make the plots draw in different shapes.
+#' Default is NA.
 #' @param measures The measures to calculate alpha diversity. Default is NA. If
-#'                 NA, all available alpha diversity measures will be
-#'                 calculated and generate a table. If not NA, measures
-#'                 should be one of "Observed", "Chao1", "ACE", "Shannon",
-#'                 "Simpson", "InvSimpson", "Fisher".
+#' NA, all available alpha diversity measures will be calculated and generate a
+#' table. If not NA, measures should be one of c("Observed", "Chao1", "ACE",
+#' "Shannon", "Simpson", "InvSimpson", "Fisher").
 #' @param p_test The p-value to test alpha diversity. p_test should be either
-#'               "wilcox" or "kruskal".
+#' "wilcox" or "kruskal". PS: "wilcox" can only work with two groups.
 #' @export
 #' @examples
 #' plot_alpha_diversity(demo_phyloseq_object, feature = "diagnosis",
@@ -27,15 +26,15 @@ plot_alpha_diversity <- function (phyloseq, feature, feature2 = NA,
   if (!is.na(measures)) {
     if (!measures %in% c("Observed", "Chao1", "ACE", "Shannon", "Simpson",
                          "InvSimpson", "Fisher")) {
-      stop('measures should be one of c("Observed", "Chao1", "ACE", "Shannon",
-           "Simpson", "InvSimpson", "Fisher").')
+      stop(paste0('Argument "measures" should be one of c("Observed", "Chao1"',
+                  ', "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher").'))
     } else {
       ## Step 1: Use plot_richness function to calculate alpha diversity
       alpha_diversity <- plot_richness(phyloseq, x = feature,
                                        measures = measures)
       ## Step 2: Calculate p-value
       if (p_test == "wilcox") {
-        # Prepare feature table for calculating Mann-Whitney U test(2 groups only)
+        # Prepare feature table for calculating Mann-Whitney U test
         feature_tab_4_MWtest <- extract_metadata_phyloseq(phyloseq, feature)
         # Extract feature levels
         feature_0 <- feature_tab_4_MWtest[[feature]] %>% unique() %>% .[1]
@@ -48,11 +47,11 @@ plot_alpha_diversity <- function (phyloseq, feature, feature2 = NA,
         p_value <- wilcox.test(alpha_diversity$data$value ~
                                  feature_tab_4_MWtest[[feature]])$p.value
       } else if (p_test == "kruskal") {
-        # Kruskal test(for 2 or more groups)
+        # Kruskal test
         p_value <- kruskal.test(alpha_diversity$data$value,
                                 factor(alpha_diversity$data[,feature]))$p.value
       } else {
-        stop("The input p_test is not supported")
+        stop("The input p_test is not supported.")
       }
       ## Step 3: Plot alpha diversity
       y <- "value"
diff --git a/R/plot_beta_diversity.R b/R/plot_beta_diversity.R
index 54972c0..972e9e5 100644
--- a/R/plot_beta_diversity.R
+++ b/R/plot_beta_diversity.R
@@ -11,17 +11,20 @@
 #' @param method The method to calculate beta diversity. Method should be one
 #' of "bray", "jaccard", "unifrac", "wunifrac". Default is "bray".
 #' PS: "unifrac" and "wunifrac" require a phylogenetic tree.
+#' @param colors A color vector for the plot, the number of colors need to
+#' match the number of feature. Default is NULL, if NULL, plot_alpha_diversity
+#' will use ggsci::scale_color_jco for the plot.
 #' @export
 #' @examples
 #' plot_beta_diversity(demo_phyloseq_object, feature = "diagnosis")
 
 plot_beta_diversity <- function(phyloseq, feature, feature2 = NA,
-                                method = "bray"){
+                                method = "bray", colors = NULL){
   set.seed(99)
   ## Step 1: Calculate beta diversity
   if (!method %in% c("bray", "jaccard", "unifrac", "wunifrac")) {
-    stop('Beta diversity method should be one of "bray", "jaccard", "unifrac",
-         "wunifrac".')
+    stop(paste0('Beta diversity method should be one of c("bray", "jaccard", ',
+                '"unifrac", "wunifrac").'))
   } else if (method %in% c("unifrac", "wunifrac")) {
     # Requires phyloseq-class that contains both an otu_table and a
     # phylogenetic tree
@@ -41,7 +44,11 @@ plot_beta_diversity <- function(phyloseq, feature, feature2 = NA,
   # Join two tables
   beta_plot <- left_join(PC, metadata)
   # Print beta-diversity table
-  select(beta_plot, SampleID, !!feature, PC1, PC2) %>% print()
+  if (is.na(feature2)) {
+    select(beta_plot, SampleID, !!feature, PC1, PC2) %>% print()
+  } else {
+    select(beta_plot, SampleID, !!feature, !!feature2, PC1, PC2) %>% print()
+  }
   ## Step 3: Plot beta diversity
   # Make x-axis and y-axis names for aes_string
   x_name <- "PC1"
@@ -66,7 +73,6 @@ plot_beta_diversity <- function(phyloseq, feature, feature2 = NA,
             axis.text.x = element_text(size = 12),
             legend.text = element_text(size = 12),
             strip.text.x = element_text(size = 14))
-    p + ggsci::scale_color_jco() + ggsci::scale_fill_jco()
   } else {
     p <- ggplot(data = beta_plot,
                 # Use aes_string() to pass variables to ggplot
@@ -85,6 +91,13 @@ plot_beta_diversity <- function(phyloseq, feature, feature2 = NA,
             axis.text.x = element_text(size = 12),
             legend.text = element_text(size = 12),
             strip.text.x = element_text(size = 14))
+  }
+  if (is.null(colors)) {
     p + ggsci::scale_color_jco() + ggsci::scale_fill_jco()
+  } else if (length(colors) != length(unique(beta_plot[[feature]]))) {
+    stop(paste0("The number of colors and the number of ", feature,
+                " does not mtach."))
+  } else {
+    p + scale_color_manual(values = colors)
   }
 }
diff --git a/R/plot_correlation.R b/R/plot_correlation.R
index 26f04e1..8aba611 100644
--- a/R/plot_correlation.R
+++ b/R/plot_correlation.R
@@ -13,8 +13,8 @@
 plot_correlation <- function (cor_tab, x, y, method = "pearson") {
   # Notice: Colnames of the input table can only be letters or numbers.
   if (any(str_detect(c(x, y), '\\W'))) {
-    stop("Colnames of the input columns can only contain letters or numbers,
-         or it can't be recognized when plotting.")
+    stop(paste0("Colnames of the input columns can only contain letters or',
+                ' numbers, or it can't be recognized when plotting."))
   }
   if (method == "pearson") {
     unit <- "Pearson's r"
diff --git a/R/plot_sparsity.R b/R/plot_sparsity.R
index b09cbac..38a63a9 100644
--- a/R/plot_sparsity.R
+++ b/R/plot_sparsity.R
@@ -1,29 +1,28 @@
 #' plot_sparsity
 #'
-#' plot_sparsity can show the sparsity of an OTU table. It will calculate the 
-#' prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis 
+#' plot_sparsity can show the sparsity of an OTU table. It will calculate the
+#' prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis
 #' is OTU count.
 #'
-#' @param otu_table An OTU table in phyloseq object format. Rownames are 
+#' @param otu_table An OTU table in phyloseq object format. Rownames are
 #' sample ID, colnames are taxa.
-#' @param binwidth The width of the bins. Can be specified as a numeric value, 
-#' or a function that calculates width from x. The default is to use bins bins 
-#' that cover the range of the data. You should always override this value, 
-#' exploring multiple widths to find the best to illustrate the stories in your 
+#' @param binwidth The width of the bins. Can be specified as a numeric value,
+#' or a function that calculates width from x. The default is to use bins bins
+#' that cover the range of the data. You should always override this value,
+#' exploring multiple widths to find the best to illustrate the stories in your
 #' data.
-#' 
 #' @export
 #' @examples
 #' plot_sparsity(dada2_res$seq_tab, 5)
 
 plot_sparsity <- function(otu_table, binwidth = NA) {
-  otu_table <- otu_table %>% 
+  otu_table <- otu_table %>%
     # Protect rownames (Tidyverse will automatically remove rownames)
-    rownames_to_column() %>% 
+    rownames_to_column() %>%
     # Remove all 0 OTUs
-    filter_if(is.numeric, any_vars(. != 0)) %>% 
+    filter_if(is.numeric, any_vars(. != 0)) %>%
     # Recover rownames
-    column_to_rownames() %>% 
+    column_to_rownames() %>%
     # Transposes OTU table
     t() %>% as.data.frame()
   # Replace 0 with NA
@@ -31,23 +30,23 @@ plot_sparsity <- function(otu_table, binwidth = NA) {
   # Calculate sparsity
   otu_table <- apply(
     otu_table, 1, function(x) round((sum(!is.na(x))/ncol(otu_table))*100, 0)
-    ) %>% 
-    as.data.frame() %>% 
+    ) %>%
+    as.data.frame() %>%
     rownames_to_column(var = 'otu')
   colnames(otu_table)[2] <- 'prevalence'
   if (!is.na(binwidth)) {
-    ggplot(otu_table, aes(prevalence)) + 
-      geom_histogram(binwidth = binwidth) + 
-      xlab('Prevalence of each OTU') + 
-      ylab('Count') + 
-      theme_bw() + 
+    ggplot(otu_table, aes(prevalence)) +
+      geom_histogram(binwidth = binwidth) +
+      xlab('Prevalence of each OTU') +
+      ylab('Count') +
+      theme_bw() +
       theme(panel.grid = element_blank())
   } else {
-    ggplot(otu_table, aes(prevalence)) + 
-      geom_histogram() + 
-      xlab('Prevalence of each OTU') + 
-      ylab('Count') + 
-      theme_bw() + 
+    ggplot(otu_table, aes(prevalence)) +
+      geom_histogram() +
+      xlab('Prevalence of each OTU') +
+      ylab('Count') +
+      theme_bw() +
       theme(panel.grid = element_blank())
   }
 }
diff --git a/R/plot_stacked_bar.R b/R/plot_stacked_bar.R
index 997869b..d0692b8 100644
--- a/R/plot_stacked_bar.R
+++ b/R/plot_stacked_bar.R
@@ -34,24 +34,25 @@ plot_stacked_bar <- function(phyloseq = NULL, level = NA, feature = NA,
   if (is.null(phyloseq)) {
     if (is.null(otu_table)) {
       # If no phyloseq, require otu_table
-      stop("Argument 'phyloseq' and 'otu_table' are both missing,
-           please input a phyloseq object or an OTU table and metadata.")
+      stop(paste0("Argument 'phyloseq' and 'otu_table' are both missing, ",
+                  "please input a phyloseq object or an OTU table and",
+                  " metadata."))
     } else {
       if (is.null(metadata)) {
         # If have otu_table, require metadata
-        stop("Argument 'metadata' is missing, please input a metadata for the
-             OTU table.")
+        stop(paste0("Argument 'metadata' is missing, please input a metadata",
+                    " for the OTU table."))
       }
     }
   } else {
     if (!is.null(otu_table)) {
       # If have phyloseq, require no otu_table
-      stop("Argument 'phyloseq' and 'otu_table' are both detected, please input
-           one of them, not both.")
+      stop(paste0("Argument 'phyloseq' and 'otu_table' are both detected, ",
+                  "please input one of them, not both."))
     } else if (is.na(level)) {
       # If have phyloseq, require level
-      stop("Argument 'level' is missing. Plaese choose a taxonomy level to plot
-           stacked bar.")
+      stop(paste0("Argument 'level' is missing. Plaese choose a taxonomy ",
+                  "level to plot stacked bar."))
     }
   }
   # First construct otu then convert to percentage
diff --git a/R/track_reads_dada2.R b/R/track_reads_dada2.R
index a67a05e..d669f5e 100644
--- a/R/track_reads_dada2.R
+++ b/R/track_reads_dada2.R
@@ -8,17 +8,16 @@
 #' stage in dada2 workflow, Y-axis will be the reads counts.
 #'
 #' @param reads_track The reads track data frame from Xbiome 16S pipeline dada2
-#'                    workflow result.
+#' workflow result.
 #' @param single_end Default is FALSE. If single_end == TRUE, means the
-#'                   sequence files are single end, the x-axis will contain
-#'                   'input', 'filtered', 'dereplicated', 'nonchim'. If
-#'                   single_end == FALSE, means the sequence files are paired
-#'                   end, the x-axis will contain 'input', 'filtered',
-#'                   'denoisedF', 'denoisedR', 'merged', 'nonchim'.
+#' sequence files are single end, the x-axis will contain 'input', 'filtered',
+#' 'dereplicated', 'nonchim'. If single_end == FALSE, means the sequence files
+#' are paired end, the x-axis will contain 'input', 'filtered', 'denoisedF',
+#' 'denoisedR', 'merged', 'nonchim'.
 #' @param relative_abundance Default is FALSE. If TRUE, will turn values to
-#'                           relative abundance.
+#' relative abundance.
 #' @param legend_position Legend position. Default is top. One of "none",
-#'                        "left", "right", "bottom", "top".
+#' "left", "right", "bottom", "top".
 #' @export
 #' @examples
 #' track_reads_dada2(demo_dada2_result$reads_track, single_end = FALSE)
diff --git a/man/construct_lefse_table.Rd b/man/construct_lefse_table.Rd
index 3b2c01c..466d883 100644
--- a/man/construct_lefse_table.Rd
+++ b/man/construct_lefse_table.Rd
@@ -14,11 +14,10 @@ metadata and phylogenetic tree.}
 table, feature will be the first row.}
 
 \item{level}{The coloumn name of the level wanted to select. Default is
-"all". If "all" then retain all taxonomy level, else retain the
-taxonomy from Kingdom to selected level, drop everything else.
-Level name should be one of "all", "Kingdom", "Phylum", "Class",
-"Order", "Family", "Genus", "Species". Taxonomy will be
-seperated by "|".}
+"all". If "all" then retain all taxonomy level, else retain the taxonomy
+from Kingdom to selected level, drop everything else. Level name should be
+one of c("all", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus",
+"Species"). Taxonomy will be seperated by "|".}
 }
 \description{
 construct_lefse_table can construct a LEfSe-format OTU table.
diff --git a/man/construct_otu_table.Rd b/man/construct_otu_table.Rd
index e10d41c..5ba875f 100644
--- a/man/construct_otu_table.Rd
+++ b/man/construct_otu_table.Rd
@@ -11,10 +11,10 @@ construct_otu_table(phyloseq, level = "all")
 metadata and phylogenetic tree.}
 
 \item{level}{The coloumn name of the level wanted to select. Default is
-"all". If "all" then retain all taxonomy level and seperate by
-"; ", else ONLY retain the given taxonomy level, drop
-everything else. Level name should be one of "all", "Kingdom",
-"Phylum", "Class", "Order", "Family", "Genus", "Species".}
+"all". If "all" then retain all taxonomy level and seperate by "; ", else
+ONLY retain the given taxonomy level, drop everything else. Level name
+should be one of "all", "Kingdom", "Phylum", "Class", "Order", "Family",
+"Genus", "Species".}
 }
 \description{
 construct_otu_table can construct a OTU table with a phyloseq object.
diff --git a/man/convert_to_percentage.Rd b/man/convert_to_percentage.Rd
index c3f46f7..13eb4c4 100644
--- a/man/convert_to_percentage.Rd
+++ b/man/convert_to_percentage.Rd
@@ -10,10 +10,9 @@ convert_to_percentage(df, row_sum = TRUE)
 \item{df}{a input data frame.}
 
 \item{row_sum}{Default is TRUE. If row_sum == TRUE, then will take every
-value of a row and divide by the summary of this row, and
-apply this to every row. If row_sum == FALSE, then will take
-every value of a column and divide by the summary of this
-column, and apply this to every column.}
+value of a row and divide by the summary of this row, and apply this to
+every row. If row_sum == FALSE, then will take every value of a column and
+divide by the summary of this column, and apply this to every column.}
 }
 \description{
 convert_to_percentage can convert a data frame to percentage.
diff --git a/man/extract_metadata_phyloseq.Rd b/man/extract_metadata_phyloseq.Rd
index 25331c9..5d3a17a 100644
--- a/man/extract_metadata_phyloseq.Rd
+++ b/man/extract_metadata_phyloseq.Rd
@@ -11,8 +11,8 @@ extract_metadata_phyloseq(phyloseq, feature = NA)
 metadata and phylogenetic tree.}
 
 \item{feature}{The column name of the feature you want to select. Default is
-NA. If NA, will return the complete metadata, else will
-return subject id and feature column that's given.}
+NA. If NA, will return the complete metadata, else will return subject id
+and feature column that's given.}
 }
 \description{
 extract_metadata_phyloseq can extract metadata from a phyloseq object.
diff --git a/man/log2fc.Rd b/man/log2fc.Rd
index a1797b5..bc6e010 100644
--- a/man/log2fc.Rd
+++ b/man/log2fc.Rd
@@ -11,12 +11,12 @@ log2fc(phyloseq, feature, level = NA, p_value = 0.05, save_res = FALSE,
 \item{phyloseq}{A phyloseq object contain otu table, taxonomy table, sample
 metadata and phylogenetic tree.}
 
-\item{feature}{The column name of the feature you want to select from metadata,
-e.g. "Phenotype".}
+\item{feature}{The column name of the feature you want to select from
+metadata, e.g. "Phenotype".}
 
-\item{level}{Which taxonomy level to calculate fold change. Default is NA. If
-level is given, will use construct_otu_table function to construct
-OTU table, and use DESeq to calculate fold change.}
+\item{level}{Which taxonomy level to calculate fold change. Default is NA.
+If level is given, will use construct_otu_table function to construct OTU
+table, and use DESeq to calculate fold change.}
 
 \item{p_value}{The cut off P value for the fold change. Default is 0.05.}
 
diff --git a/man/plot_alpha_diversity.Rd b/man/plot_alpha_diversity.Rd
index c074429..969eb3a 100644
--- a/man/plot_alpha_diversity.Rd
+++ b/man/plot_alpha_diversity.Rd
@@ -15,17 +15,16 @@ metadata and phylogenetic tree.}
 metadata.}
 
 \item{feature2}{The column name of another feature you want to select from
-metadata, e.g. "Gender", which will make the plots draw in
-different shapes. Default is NA.}
+metadata, e.g. "Gender", which will make the plots draw in different shapes.
+Default is NA.}
 
 \item{measures}{The measures to calculate alpha diversity. Default is NA. If
-NA, all available alpha diversity measures will be
-calculated and generate a table. If not NA, measures
-should be one of "Observed", "Chao1", "ACE", "Shannon",
-"Simpson", "InvSimpson", "Fisher".}
+NA, all available alpha diversity measures will be calculated and generate a
+table. If not NA, measures should be one of c("Observed", "Chao1", "ACE",
+"Shannon", "Simpson", "InvSimpson", "Fisher").}
 
 \item{p_test}{The p-value to test alpha diversity. p_test should be either
-"wilcox" or "kruskal".}
+"wilcox" or "kruskal". PS: "wilcox" can only work with two groups.}
 }
 \description{
 This is a function for plotting alpha diversity.
diff --git a/man/plot_beta_diversity.Rd b/man/plot_beta_diversity.Rd
index 1e86c58..6c38ec6 100644
--- a/man/plot_beta_diversity.Rd
+++ b/man/plot_beta_diversity.Rd
@@ -4,7 +4,8 @@
 \alias{plot_beta_diversity}
 \title{plot_beta_diversity}
 \usage{
-plot_beta_diversity(phyloseq, feature, feature2 = NA, method = "bray")
+plot_beta_diversity(phyloseq, feature, feature2 = NA, method = "bray",
+  colors = NULL)
 }
 \arguments{
 \item{phyloseq}{A phyloseq object contain otu table, taxonomy table, sample
@@ -19,6 +20,10 @@ metadata, which will show in different shape, e.g. "Gender". Default is NA.}
 \item{method}{The method to calculate beta diversity. Method should be one
 of "bray", "jaccard", "unifrac", "wunifrac". Default is "bray".
 PS: "unifrac" and "wunifrac" require a phylogenetic tree.}
+
+\item{colors}{A color vector for the plot, the number of colors need to
+match the number of feature. Default is NULL, if NULL, plot_alpha_diversity
+will use ggsci::scale_color_jco for the plot.}
 }
 \description{
 This is a function for plotting beta diversity.
diff --git a/man/plot_sparsity.Rd b/man/plot_sparsity.Rd
index 64a588d..4c95f8c 100644
--- a/man/plot_sparsity.Rd
+++ b/man/plot_sparsity.Rd
@@ -7,18 +7,18 @@
 plot_sparsity(otu_table, binwidth = NA)
 }
 \arguments{
-\item{otu_table}{An OTU table in phyloseq object format. Rownames are 
+\item{otu_table}{An OTU table in phyloseq object format. Rownames are
 sample ID, colnames are taxa.}
 
-\item{binwidth}{The width of the bins. Can be specified as a numeric value, 
-or a function that calculates width from x. The default is to use bins bins 
-that cover the range of the data. You should always override this value, 
-exploring multiple widths to find the best to illustrate the stories in your 
+\item{binwidth}{The width of the bins. Can be specified as a numeric value,
+or a function that calculates width from x. The default is to use bins bins
+that cover the range of the data. You should always override this value,
+exploring multiple widths to find the best to illustrate the stories in your
 data.}
 }
 \description{
-plot_sparsity can show the sparsity of an OTU table. It will calculate the 
-prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis 
+plot_sparsity can show the sparsity of an OTU table. It will calculate the
+prevalence of each OTU in all samples. X-axis is an OTU's prevalence. Y-axis
 is OTU count.
 }
 \examples{
diff --git a/man/track_reads_dada2.Rd b/man/track_reads_dada2.Rd
index 4a88edc..86b3031 100644
--- a/man/track_reads_dada2.Rd
+++ b/man/track_reads_dada2.Rd
@@ -12,11 +12,10 @@ track_reads_dada2(reads_track, single_end = FALSE,
 workflow result.}
 
 \item{single_end}{Default is FALSE. If single_end == TRUE, means the
-sequence files are single end, the x-axis will contain
-'input', 'filtered', 'dereplicated', 'nonchim'. If
-single_end == FALSE, means the sequence files are paired
-end, the x-axis will contain 'input', 'filtered',
-'denoisedF', 'denoisedR', 'merged', 'nonchim'.}
+sequence files are single end, the x-axis will contain 'input', 'filtered',
+'dereplicated', 'nonchim'. If single_end == FALSE, means the sequence files
+are paired end, the x-axis will contain 'input', 'filtered', 'denoisedF',
+'denoisedR', 'merged', 'nonchim'.}
 
 \item{relative_abundance}{Default is FALSE. If TRUE, will turn values to
 relative abundance.}