@@ -106,28 +106,49 @@ smp.names <- sample.info$Sample;
106
106
107
107
# read in genotype concordance file
108
108
# account for differences in header depending on version of bcftools used
109
- concordance.data <- read.delim(cor.file , skip = 19 );
109
+ concordance.data <- if (is.dna ) {
110
+ read.delim(cor.file , skip = 19 );
111
+ } else {
112
+ read.delim(cor.file , row.names = 1 );
113
+ }
114
+
110
115
if (ncol(concordance.data ) == 1 ) {
111
116
concordance.data <- read.delim(cor.file , skip = 32 );
112
117
}
113
118
114
- colnames(concordance.data ) <- c(' Metric' ,' Query.Sample' ,' Genotyped.Sample' ,' Discordance' ,' pvalue' ,' Total.Sites' ,' N.matched'
115
- )[1 : ncol(concordance.data )];
119
+ if (is.dna ) {
120
+ colnames(concordance.data ) <- c(' Metric' ,' Query.Sample' ,' Genotyped.Sample' ,' Discordance' ,
121
+ ' pvalue' ,' Total.Sites' ,' N.matched' )[1 : ncol(concordance.data )];
122
+
123
+ # convert N mismatches (discordance) to a proportion of total sites
124
+ concordance.data $ Proportion <- 1 - (concordance.data $ Discordance / concordance.data $ Total.Sites );
125
+
126
+ # reshape concordance data
127
+ cor.data <- reshape(
128
+ concordance.data [,c(' Query.Sample' ,' Genotyped.Sample' ,' Proportion' )],
129
+ direction = ' wide' ,
130
+ idvar = ' Genotyped.Sample' ,
131
+ timevar = ' Query.Sample'
132
+ );
116
133
117
- # convert N mismatches (discordance) to a proportion of total sites
118
- concordance.data $ Proportion <- 1 - (concordance.data $ Discordance / concordance.data $ Total.Sites );
134
+ rownames(cor.data ) <- cor.data [,1 ];
135
+ colnames(cor.data ) <- gsub(' Proportion\\ .' ,' ' ,colnames(cor.data ));
136
+ cor.data <- cor.data [,- 1 ];
119
137
120
- # reshape concordance data
121
- cor.data <- reshape(
122
- concordance.data [,c(' Query.Sample' ,' Genotyped.Sample' ,' Proportion' )],
123
- direction = ' wide' ,
124
- idvar = ' Genotyped.Sample' ,
125
- timevar = ' Query.Sample'
126
- );
138
+ # current data is one-sided, so mirror this
139
+ for (i in 1 : nrow(concordance.data )) {
140
+ query.smp <- concordance.data [i ,]$ Query.Sample ;
141
+ genotyped.smp <- concordance.data [i ,]$ Genotyped.Sample ;
142
+ cor.data [query.smp ,genotyped.smp ] <- cor.data [genotyped.smp ,query.smp ];
143
+ }
144
+
145
+ for (smp in smp.names ) { cor.data [smp ,smp ] <- 1 ; }
127
146
128
- rownames(cor.data ) <- cor.data [,1 ];
129
- colnames(cor.data ) <- gsub(' Proportion\\ .' ,' ' ,colnames(cor.data ));
130
- cor.data <- cor.data [,- 1 ];
147
+ } else {
148
+ cor.data <- concordance.data ;
149
+ rownames(cor.data ) <- gsub(' \\ .' ,' -' ,rownames(cor.data ));
150
+ colnames(cor.data ) <- gsub(' \\ .' ,' -' ,colnames(cor.data ));
151
+ }
131
152
132
153
# add in missing samples
133
154
missing.samples <- setdiff(smp.names , rownames(cor.data ));
@@ -136,14 +157,6 @@ cor.data[missing.samples,] <- NA;
136
157
missing.samples <- setdiff(smp.names , colnames(cor.data ));
137
158
cor.data [,missing.samples ] <- NA ;
138
159
139
- # current data is one-sided, so mirror this
140
- for (i in 1 : nrow(concordance.data )) {
141
- query.smp <- concordance.data [i ,]$ Query.Sample ;
142
- genotyped.smp <- concordance.data [i ,]$ Genotyped.Sample ;
143
- cor.data [query.smp ,genotyped.smp ] <- cor.data [genotyped.smp ,query.smp ];
144
- }
145
-
146
- for (smp in smp.names ) { cor.data [smp ,smp ] <- 1 ; }
147
160
148
161
# read in coverage metrics
149
162
metric.data <- read.delim(summary.file , row.names = 1 );
0 commit comments