-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathgl.filter.heterozygosity.r
119 lines (95 loc) · 4.3 KB
/
gl.filter.heterozygosity.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#' @name gl.filter.heterozygosity
#' @title Filters individuals with average heterozygosity greater than a specified upper threshold or less than a specified lower threshold.
#' @description
#' Calculates the observed heterozyosity for each individual in a genlight object and filters individuals based on specified threshold values.
#' Use gl.report.heterozygosity to determine the appropriate thresholds.
#'
#' @param x -- a genlight object containing the SNP genotypes [Required]
#' @param t.upper -- filter individuals > the threshold [default 0.7]
#' @param t.lower -- filter individuals < the threshold [default 0]
#' @param verbose -- verbosity: 0, silent or fatal errors; 1, begin and end; 2, progress log ; 3, progress and results summary; 5, full report [default 2, unless specified using gl.set.verbosity]
#' @return the filtered genlight object
#' @export
#' @author Custodian: Luis Mijangos -- Post to \url{https://groups.google.com/d/forum/dartr}
#' @importFrom plyr join
#' @examples
#' result <- gl.filter.heterozygosity(testset.gl,t.upper=0.06,verbose=3)
#' tmp <- gl.report.heterozygosity(result,method="ind")
gl.filter.heterozygosity <- function(x,
t.upper=0.7,
t.lower=0,
verbose=NULL) {
# SET VERBOSITY
verbose <- gl.check.verbosity(verbose)
# FLAG SCRIPT START
funname <- match.call()[[1]]
utils.flag.start(func=funname,build="Jackson",v=verbose)
# CHECK DATATYPE
datatype <- utils.check.datatype(x,accept="SNP",verbose=verbose)
# SCRIPT SPECIFIC ERROR CHECKING
if (t.upper < 0 | t.upper > 1){
stop(error("Fatal Error:Parameter 't.upper' must lie between 0 and 1\n"))
}
if (t.lower < 0 | t.lower > 1){
stop(error("Fatal Error:Parameter 't.upper' must lie between 0 and 1\n"))
}
# Check for monomorphic loci
if (!x@other$loc.metrics.flags$monomorphs) {
cat(warn(" Warning: genlight object contains monomorphic loci which will be factored into heterozygosity estimates\n"))
}
# DO THE JOB
# Convert to matrix
m <- as.matrix(x)
# For each individual determine counts of hets, homs and NAs
c.na <- array(NA, nInd(x))
c.hets <- array(NA, nInd(x))
c.hom0 <- array(NA, nInd(x))
c.hom2 <- array(NA, nInd(x))
for (i in 1:nInd(x)) {
c.na[i] <- sum(is.na(m[i, ]))
c.hets[i] <- sum(m[i, ] == 1, na.rm = TRUE)/(nLoc(x) - c.na[i])
c.hom0[i] <- sum(m[i, ] == 0, na.rm = TRUE)/(nLoc(x) - c.na[i])
c.hom2[i] <- sum(m[i, ] == 2, na.rm = TRUE)/(nLoc(x) - c.na[i])
}
if (verbose >=2 ) {
cat(report(" Retaining individuals with heterozygosity in the range",t.lower,"to",t.upper,"\n"))
cat(paste(" Minimum individual heterozygosity",round(min(c.hets),4),"\n"))
cat(paste(" Maximum individual heterozygosity",round(max(c.hets),4),"\n"))
}
x.kept <- x[c.hets >= t.lower & c.hets <= t.upper,]
if (any(c.hets > t.upper)) {
x.discarded.upper <- x[c.hets > t.upper,]
}
if (any(c.hets < t.lower)) {
x.discarded.lower <- x[c.hets < t.lower,]
}
# REPORT THE RESULTS
if(verbose >= 3){
cat(" Initial number of individuals:",nInd(x),"\n")
if (any(c.hets > t.upper)) {
cat(" Number of outlier individuals (heterozygosity >",t.upper,"):",nInd(x.discarded.upper),"\n")
cat(" Deleted: ")
cat(paste0(indNames(x.discarded.upper),"[",as.character(pop(x.discarded.upper)),"],"))
#cat("\n")
} else {
if (!(t.upper == 1)) {cat(" Zero outlier individuals with heterozygosity >",t.upper, "\n")}
}
if (any(c.hets < t.lower)) {
cat(" Number of outlier individuals:",nInd(x.discarded.lower),"with heterozygosity <",t.lower, "\n")
cat(" Deleted: ")
cat(paste0(indNames(x.discarded.lower),"[",as.character(pop(x.discarded.lower)),"],"))
cat("\n\n")
} else {
if ((!t.lower == 0)) {cat(" No outlying individuals with heterozygosity <",t.lower, "\n")}
}
cat(report(" Number of individuals retained:",nInd(x.kept),"\n"))
}
# ADD ACTION TO HISTORY
nh <- length(x.kept@other$history)
x.kept@other$history[[nh + 1]] <- match.call()
# FLAG SCRIPT END
if (verbose > 0) {
cat(report("\nCompleted:",funname,"\n"))
}
return(x.kept)
}