-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathgl.filter.rdepth.r
208 lines (187 loc) · 8.2 KB
/
gl.filter.rdepth.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#' @name gl.filter.rdepth
#' @title Filters loci based on counts of sequence tags scored at a locus (read
#' depth)
#'
#' @description
#' SNP datasets generated by DArT report AvgCountRef and AvgCountSnp as counts
#' of sequence tags for the reference and alternate alleles respectively. These
#' can be used to back calculate Read Depth. Fragment presence/absence datasets
#' as provided by DArT (SilicoDArT) provide Average Read Depth and Standard
#' Deviation of Read Depth as standard columns in their report.
#'
#' Filtering on Read Depth using the companion script gl.filter.rdepth can be on
#' the basis of loci with exceptionally low counts,
#' or loci with exceptionally high counts.
#'
#' @param x Name of the genlight object containing the SNP or tag
#' presence/absence data [required].
#' @param lower Lower threshold value below which loci will be removed
#' [default 5].
#' @param upper Upper threshold value above which loci will be removed
#' [default 50].
#' @param plot.out Specify if plot is to be produced [default TRUE].
#' @param plot_theme Theme for the plot. See Details for options
#' [default theme_dartR()].
#' @param plot_colors List of two color names for the borders and fill of the
#' plots [default two_colors].
#' @param save2tmp If TRUE, saves any ggplots and listings to the session
#' temporary directory (tempdir) [default FALSE].
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log; 3, progress and results summary; 5, full report
#' [default 2, unless specified using gl.set.verbosity].
#'
#' @details
#' For examples of themes, see:
#' \itemize{
#' \item \url{https://ggplot2.tidyverse.org/reference/ggtheme.html} and \item
#' \url{https://yutannihilation.github.io/allYourFigureAreBelongToUs/ggthemes/}
#' }
#' @return Returns a genlight object retaining loci with a Read Depth in the
#' range specified by the lower and upper threshold.
#' @author Custodian: Arthur Georges (Post to
#' \url{https://groups.google.com/d/forum/dartr})
#' @examples
#' # SNP data
#' gl.report.rdepth(testset.gl)
#' result <- gl.filter.rdepth(testset.gl, lower=8, upper=50, verbose=3)
#' # Tag P/A data
#' result <- gl.filter.rdepth(testset.gs, lower=8, upper=50, verbose=3)
#'
#' @seealso \code{\link{gl.filter.rdepth}}
#'
#' @family filters and filter reports
#' @import patchwork
#' @export
gl.filter.rdepth <- function(x,
lower = 5,
upper = 50,
plot.out = TRUE,
plot_theme = theme_dartR(),
plot_colors = two_colors,
save2tmp = FALSE,
verbose = NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(func = funname,
build = "Jody",
verbosity = verbose)
# CHECK DATATYPE
datatype <- utils.check.datatype(x, verbose = verbose)
# FUNCTION SPECIFIC ERROR CHECKING
# DO THE JOB
n0 <- nLoc(x)
if (datatype == "SilicoDArT") {
rdepth <- x@other$loc.metrics$AvgReadDepth
} else if (datatype == "SNP") {
rdepth <- x@other$loc.metrics$rdepth
}
# Remove SNP loci with rdepth < threshold
if (verbose >= 2) {
cat(report(
" Removing loci with rdepth <=",
lower,
"and >=",
upper,
"\n"
))
}
index <- (rdepth >= lower & rdepth <= upper)
x2 <- x[, index]
# Remove the corresponding records from the loci metadata
x2@other$loc.metrics <- x@other$loc.metrics[index,]
# PLOT HISTOGRAMS, BEFORE AFTER
if (plot.out) {
plotvar <- rdepth
# min <- min(plotvar,lower) min <- trunc(min*100)/100
max <- max(plotvar, upper, na.rm = TRUE)
max <- ceiling(max / 10) * 10
if (datatype == "SNP") {
xlabel <- "Pre-filter SNP read depth"
} else {
xlabel <- "Pre-filter P/A read depth"
}
p1 <-
ggplot(data.frame(plotvar), aes(x = plotvar)) + geom_histogram(bins = 100,
color = plot_colors[1],
fill = plot_colors[2]) + coord_cartesian(xlim = c(0, max)) + geom_vline(xintercept = lower,
color = "red",
size = 1) + geom_vline(xintercept = upper,
color = "red",
size = 1) + xlab(xlabel) +
ylab("Count") + plot_theme
if (datatype == "SilicoDArT") {
rdepth <- x2@other$loc.metrics$AvgReadDepth
} else if (datatype == "SNP") {
rdepth <- x2@other$loc.metrics$rdepth
}
plotvar <- rdepth
# min <- min(plotvar,lower) min <- trunc(min*100)/100
max <- max(plotvar, upper, na.rm = TRUE)
max <- ceiling(max / 10) * 10
if (datatype == "SNP") {
xlabel <- "Post-filter SNP read depth"
} else {
xlabel <- "Post-filter P/A read depth"
}
p2 <-
ggplot(data.frame(plotvar), aes(x = plotvar)) +
geom_histogram(bins = 100,
color = plot_colors[1],
fill = plot_colors[2]) + coord_cartesian(xlim = c(0, max)) + geom_vline(xintercept = lower,
color = "red",
size = 1) + geom_vline(xintercept = upper,
color = "red",
size = 1) + xlab(xlabel) +
ylab("Count") + plot_theme
p3 <- (p1 / p2) + plot_layout(heights = c(1, 1))
print(p3)
}
# REPORT A SUMMARY
if (verbose >= 3) {
cat(" Summary of filtered dataset\n")
cat(" Initial no. of loci =", n0, "\n")
# cat(paste(' read depth >=',lower,'and read depth <=',upper,'\n'))
cat(" No. of loci deleted =", (n0 - nLoc(x2)), "\n")
cat(paste(" No. of loci retained:", nLoc(x2), "\n"))
cat(paste(" No. of individuals:", nInd(x2), "\n"))
cat(paste(" No. of populations: ", length(levels(
factor(pop(x2))
)), "\n"))
}
# SAVE INTERMEDIATES TO TEMPDIR
if (save2tmp & plot.out) {
temp_plot <-
tempfile(pattern = paste0(
"Plot",
paste0(
names(match.call()),
"_",
as.character(match.call()),
collapse = "_"
),
"_"
))
# saving to tempdir
saveRDS(p3, file = temp_plot)
if (verbose >= 2) {
cat(report(
" Saving the plot in ggplot format to the session tempfile\n"
))
cat(
report(
" NOTE: Retrieve output files from tempdir using gl.list.reports() and gl.print.reports()\n"
)
)
}
}
# ADD TO HISTORY
nh <- length(x2@other$history)
x2@other$history[[nh + 1]] <- match.call()
# FLAG SCRIPT END
if (verbose > 0) {
cat(report("Completed:", funname, "\n"))
}
return(x2)
}