-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathgl.filter.heterozygosity.r
180 lines (159 loc) · 5.41 KB
/
gl.filter.heterozygosity.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#' @name gl.filter.heterozygosity
#' @title Filters individuals with average heterozygosity greater than a
#' specified upper threshold or less than a specified lower threshold
#' @description
#' Calculates the observed heterozygosity for each individual in a genlight
#' object and filters individuals based on specified threshold values.
#' Use gl.report.heterozygosity to determine the appropriate thresholds.
#'
#' @param x A genlight object containing the SNP genotypes [required].
#' @param t.upper Filter individuals > the threshold [default 0.7].
#' @param t.lower Filter individuals < the threshold [default 0].
#' @param verbose Verbosity: 0, silent or fatal errors; 1, begin and end; 2,
#' progress log; 3, progress and results summary; 5, full report
#' [default 2, unless specified using gl.set.verbosity].
#' @return The filtered genlight object.
#' @export
#' @author Custodian: Luis Mijangos -- Post to
#' \url{https://groups.google.com/d/forum/dartr}
#' @importFrom plyr join
#' @family filter functions
#' @examples
#' result <- gl.filter.heterozygosity(testset.gl,t.upper=0.06,verbose=3)
#' tmp <- gl.report.heterozygosity(result,method='ind')
gl.filter.heterozygosity <- function(x,
t.upper = 0.7,
t.lower = 0,
verbose = NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(func = funname,
build = "Jody",
verbose = verbose)
# CHECK DATATYPE
datatype <-
utils.check.datatype(x, accept = "SNP", verbose = verbose)
# SCRIPT SPECIFIC ERROR CHECKING
if (t.upper < 0 | t.upper > 1) {
stop(error(
"Fatal Error:Parameter 't.upper' must lie between 0 and 1\n"
))
}
if (t.lower < 0 | t.lower > 1) {
stop(error(
"Fatal Error:Parameter 't.upper' must lie between 0 and 1\n"
))
}
# Check for monomorphic loci
if (!x@other$loc.metrics.flags$monomorphs) {
cat(
warn(
" Warning: genlight object contains monomorphic loci which will
be factored into heterozygosity estimates\n"
)
)
}
# DO THE JOB
# Convert to matrix
m <- as.matrix(x)
# For each individual determine counts of hets, homs and NAs
c.na <- array(NA, nInd(x))
c.hets <- array(NA, nInd(x))
c.hom0 <- array(NA, nInd(x))
c.hom2 <- array(NA, nInd(x))
for (i in 1:nInd(x)) {
c.na[i] <- sum(is.na(m[i,]))
c.hets[i] <- sum(m[i,] == 1, na.rm = TRUE) / (nLoc(x) - c.na[i])
c.hom0[i] <- sum(m[i,] == 0, na.rm = TRUE) / (nLoc(x) - c.na[i])
c.hom2[i] <- sum(m[i,] == 2, na.rm = TRUE) / (nLoc(x) - c.na[i])
}
if (verbose >= 2) {
cat(
report(
" Retaining individuals with heterozygosity in the range",
t.lower,
"to",
t.upper,
"\n"
)
)
cat(paste(
" Minimum individual heterozygosity",
round(min(c.hets), 4),
"\n"
))
cat(paste(
" Maximum individual heterozygosity",
round(max(c.hets), 4),
"\n"
))
}
x.kept <- x[c.hets >= t.lower & c.hets <= t.upper,]
if (any(c.hets > t.upper)) {
x.discarded.upper <- x[c.hets > t.upper,]
}
if (any(c.hets < t.lower)) {
x.discarded.lower <- x[c.hets < t.lower,]
}
# REPORT THE RESULTS
if (verbose >= 3) {
cat(" Initial number of individuals:", nInd(x), "\n")
if (any(c.hets > t.upper)) {
cat(
" Number of outlier individuals (heterozygosity >",
t.upper,
"):",
nInd(x.discarded.upper),
"\n"
)
cat(" Deleted: ")
cat(paste0(
indNames(x.discarded.upper),
"[",
as.character(pop(x.discarded.upper)),
"],"
))
# cat('\n')
} else {
if (!(t.upper == 1)) {
cat(" Zero outlier individuals with heterozygosity >",
t.upper,
"\n")
}
}
if (any(c.hets < t.lower)) {
cat(
" Number of outlier individuals:",
nInd(x.discarded.lower),
"with heterozygosity <",
t.lower,
"\n"
)
cat(" Deleted: ")
cat(paste0(
indNames(x.discarded.lower),
"[",
as.character(pop(x.discarded.lower)),
"],"
))
cat("\n\n")
} else {
if ((!t.lower == 0)) {
cat(" No outlying individuals with heterozygosity <",
t.lower,
"\n")
}
}
cat(report(" Number of individuals retained:", nInd(x.kept), "\n"))
}
# ADD ACTION TO HISTORY
nh <- length(x.kept@other$history)
x.kept@other$history[[nh + 1]] <- match.call()
# FLAG SCRIPT END
if (verbose > 0) {
cat(report("Completed:", funname, "\n"))
}
return(x.kept)
}