-
Notifications
You must be signed in to change notification settings - Fork 1
/
qiime2-phyloseq.R
103 lines (92 loc) · 3.25 KB
/
qiime2-phyloseq.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#### import and analysis of qiime2 results using phyloseq ####
# follows tutorial available at http://joey711.github.io/phyloseq-demo/phyloseq-demo.html
# install bioconductor package
#source("http://bioconductor.org/biocLite.R")
#biocLite('phyloseq')
# load package
library(phyloseq)
# install ggplot2
#install.packages("ggplot2")
# load package
library(ggplot2)
#### data import ####
# import biom (otu table and tax table)
biom <- import_biom("qiime2-exported/table.biom", parseFunction = parse_taxonomy_greengenes)
# you will receive warning messages from the command above
# the code below corrects this issue with taxonomic ranks
# view names
rank_names(biom)
# correct taxonomy
tax_table(biom) <- tax_table(biom)[, 1:7]
rank_names(biom)
biom
# import map (metadata) file
map <- import_qiime_sample_data("qiime2-exported/sample-metadata.tsv")
str(map)
# import tree file
rep_tre <- read_tree("qiime2-exported/tree.nwk")
rep_tre
# combine otu/tax and map
comb <- merge_phyloseq(biom, map, rep_tre)
str(comb)
comb
#### inspect combined otu/taxonomy/map data ####
## samples
# show sample names
sample_names(comb)
# count number of samples
nsamples(comb)
# show variables (metadata) for samples
sample_variables(comb)
## taxa
# show unique phyla in dataset
get_taxa_unique(comb, "Phylum")
# show total counts for each taxon in dataset
taxa_sums(comb)
# show only top 10 most frequent taxa
taxa_sums(comb)[1:10]
# examine total counts for each sample
sample_sums(comb)
# rarefy samples to even depth
comb.rar <- rarefy_even_depth(comb)
# this object can be used in place of comb anywhere below
#### summary plots and statistical testing
## richness
# plot richness calculated across several metrics
plot_richness(comb)
# plot richness by body site
plot_richness(comb, x = "BodySite")
# plot richness by subject
plot_richness(comb, x = "Subject")
# plot richness by reported antibiotic usage
(p <- plot_richness(comb, x = "ReportedAntibioticUsage"))
p + geom_boxplot(data = p$data,
aes(x = ReportedAntibioticUsage), alpha = 0.1)
# the plot above displays both the boxplots and all data points
## phylogenetic tree
# extract only data from two families
comb.sub <- subset_taxa(comb, Family == "Bacteroidaceae" | Family == "[Paraprevotellaceae]")
# plot tree
plot_tree(comb.sub, color = "ReportedAntibioticUsage", # color by antibiotic usage
shape = "Genus", label.tips = "Species", # specify shape and labels for tips
size = "abundance", plot.margin = 0.5, # size points by abundance
ladderize = TRUE) # order appearance of tips
## abundance bar plots
# identify 10 most abundant taxa
top.taxa <- names(sort(taxa_sums(comb), TRUE)[1:10])
# extract most abundant taxa
comb.top <- prune_taxa(top.taxa, comb)
# plot abundance
plot_bar(comb.top, "BodySite", fill = "ReportedAntibioticUsage", facet_grid = ~Genus)
## ordination plot
# NMDS plot
plot_ordination(comb, ordinate(comb, "MDS"), color = "BodySite") +
geom_point(size = 3)
## network plot
# calculate distance
comb.net <- make_network(comb, type = "samples", distance = "bray", max.dist = 0.85)
# plot network
plot_network(comb.net, comb, color = "BodySite", shape = "ReportedAntibioticUsage",
line_weight = 0.4, label = NULL)
## heatmap
plot_heatmap(comb.top, "NMDS", "bray", "BodySite", "Family")