Skip to content

Commit 0d60209

Browse files
author
sprokopec
committed
incorporated new genotype concordance method
1 parent 4165949 commit 0d60209

File tree

1 file changed

+36
-23
lines changed

1 file changed

+36
-23
lines changed

scripts/report/plot_qc_metrics.R

Lines changed: 36 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -106,28 +106,49 @@ smp.names <- sample.info$Sample;
106106

107107
# read in genotype concordance file
108108
# account for differences in header depending on version of bcftools used
109-
concordance.data <- read.delim(cor.file, skip = 19);
109+
concordance.data <- if (is.dna) {
110+
read.delim(cor.file, skip = 19);
111+
} else {
112+
read.delim(cor.file, row.names = 1);
113+
}
114+
110115
if (ncol(concordance.data) == 1) {
111116
concordance.data <- read.delim(cor.file, skip = 32);
112117
}
113118

114-
colnames(concordance.data) <- c('Metric','Query.Sample','Genotyped.Sample','Discordance','pvalue','Total.Sites','N.matched'
115-
)[1:ncol(concordance.data)];
119+
if (is.dna) {
120+
colnames(concordance.data) <- c('Metric','Query.Sample','Genotyped.Sample','Discordance',
121+
'pvalue','Total.Sites','N.matched')[1:ncol(concordance.data)];
122+
123+
# convert N mismatches (discordance) to a proportion of total sites
124+
concordance.data$Proportion <- 1 - (concordance.data$Discordance / concordance.data$Total.Sites);
125+
126+
# reshape concordance data
127+
cor.data <- reshape(
128+
concordance.data[,c('Query.Sample','Genotyped.Sample','Proportion')],
129+
direction = 'wide',
130+
idvar = 'Genotyped.Sample',
131+
timevar = 'Query.Sample'
132+
);
116133

117-
# convert N mismatches (discordance) to a proportion of total sites
118-
concordance.data$Proportion <- 1 - (concordance.data$Discordance / concordance.data$Total.Sites);
134+
rownames(cor.data) <- cor.data[,1];
135+
colnames(cor.data) <- gsub('Proportion\\.','',colnames(cor.data));
136+
cor.data <- cor.data[,-1];
119137

120-
# reshape concordance data
121-
cor.data <- reshape(
122-
concordance.data[,c('Query.Sample','Genotyped.Sample','Proportion')],
123-
direction = 'wide',
124-
idvar = 'Genotyped.Sample',
125-
timevar = 'Query.Sample'
126-
);
138+
# current data is one-sided, so mirror this
139+
for (i in 1:nrow(concordance.data)) {
140+
query.smp <- concordance.data[i,]$Query.Sample;
141+
genotyped.smp <- concordance.data[i,]$Genotyped.Sample;
142+
cor.data[query.smp,genotyped.smp] <- cor.data[genotyped.smp,query.smp];
143+
}
144+
145+
for (smp in smp.names) { cor.data[smp,smp] <- 1; }
127146

128-
rownames(cor.data) <- cor.data[,1];
129-
colnames(cor.data) <- gsub('Proportion\\.','',colnames(cor.data));
130-
cor.data <- cor.data[,-1];
147+
} else {
148+
cor.data <- concordance.data;
149+
rownames(cor.data) <- gsub('\\.','-',rownames(cor.data));
150+
colnames(cor.data) <- gsub('\\.','-',colnames(cor.data));
151+
}
131152

132153
# add in missing samples
133154
missing.samples <- setdiff(smp.names, rownames(cor.data));
@@ -136,14 +157,6 @@ cor.data[missing.samples,] <- NA;
136157
missing.samples <- setdiff(smp.names, colnames(cor.data));
137158
cor.data[,missing.samples] <- NA;
138159

139-
# current data is one-sided, so mirror this
140-
for (i in 1:nrow(concordance.data)) {
141-
query.smp <- concordance.data[i,]$Query.Sample;
142-
genotyped.smp <- concordance.data[i,]$Genotyped.Sample;
143-
cor.data[query.smp,genotyped.smp] <- cor.data[genotyped.smp,query.smp];
144-
}
145-
146-
for (smp in smp.names) { cor.data[smp,smp] <- 1; }
147160

148161
# read in coverage metrics
149162
metric.data <- read.delim(summary.file, row.names = 1);

0 commit comments

Comments
 (0)