Skip to content

Commit

Permalink
pairedDerep returns empty data.frame if no consistent pairs in dada (b…
Browse files Browse the repository at this point in the history
…enjjneb#485)

* pairedDerep returns empty data.frame if no consistent pairs in dada

* Returning from nested function within lapply

* Remove debugging output statement

* Added message for (verbose=TRUE) and no pairings
  • Loading branch information
derele authored and benjjneb committed Jun 15, 2018
1 parent 2df8c69 commit 7d8d2df
Showing 1 changed file with 69 additions and 59 deletions.
128 changes: 69 additions & 59 deletions R/paired.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,7 @@ mergePairs <- function(dadaF, derepF, dadaR, derepR, minOverlap = 12, maxMismatc
nrecs <- c(length(dadaF), length(derepF), length(dadaR), length(derepR))
if(length(unique(nrecs))>1) stop("The dadaF/derepF/dadaR/derepR arguments must be the same length.")

rval <- list()
for(i in seq_along(dadaF)) {
rval <- lapply(seq_along(dadaF), function (i) {
mapF <- derepF[[i]]$map
mapR <- derepR[[i]]$map
if(!(is.integer(mapF) && is.integer(mapR))) stop("Incorrect format of $map in derep-class arguments.")
Expand All @@ -110,69 +109,80 @@ mergePairs <- function(dadaF, derepF, dadaR, derepR, minOverlap = 12, maxMismatc
pairdf <- data.frame(sequence = "", abundance=0, forward=rF, reverse=rR)
ups <- unique(pairdf) # The unique forward/reverse pairs of denoised sequences
keep <- !is.na(ups$forward) & !is.na(ups$reverse)
ups <- ups[!is.na(ups$forward) & !is.na(ups$reverse),] # Drop pairs with uncorrected uniques
Funqseq <- unname(as.character(dadaF[[i]]$clustering$sequence[ups$forward]))
Runqseq <- rc(unname(as.character(dadaR[[i]]$clustering$sequence[ups$reverse])))

if (justConcatenate == TRUE) {
# Simply concatenate the sequences together
ups$sequence <- mapply(function(x,y) paste0(x,"NNNNNNNNNN", y), Funqseq, Runqseq, SIMPLIFY=FALSE);
ups$nmatch <- 0
ups$nmismatch <- 0
ups$nindel <- 0
ups$prefer <- NA
ups$accept <- TRUE
ups <- ups[keep, ]
if (nrow(ups)==0) {
outnames <- c("sequence", "abundance", "forward", "reverse",
"nmatch", "nmismatch", "nindel", "prefer", "accept")
ups <- data.frame(matrix(ncol = length(outnames), nrow = 0))
names(ups) <- outnames
if(verbose) {
message("No paired-reads (in ZERO unique pairings) successfully merged out of ", nrow(pairdf), " pairings) input.")
}
return(ups)
} else {
# Align forward and reverse reads.
# Use unbanded N-W align to compare forward/reverse
# Adjusting align params to prioritize zero-mismatch merges
tmp <- getDadaOpt(c("MATCH", "MISMATCH", "GAP_PENALTY"))
if(maxMismatch==0) {
setDadaOpt(MATCH=1L, MISMATCH=-64L, GAP_PENALTY=-64L)
} else {
setDadaOpt(MATCH=1L, MISMATCH=-8L, GAP_PENALTY=-8L)
}
alvecs <- mapply(function(x,y) nwalign(x,y,band=-1,...), Funqseq, Runqseq, SIMPLIFY=FALSE)
setDadaOpt(tmp)
outs <- t(sapply(alvecs, function(x) C_eval_pair(x[1], x[2])))
ups$nmatch <- outs[,1]
ups$nmismatch <- outs[,2]
ups$nindel <- outs[,3]
ups$prefer <- 1 + (dadaR[[i]]$clustering$n0[ups$reverse] > dadaF[[i]]$clustering$n0[ups$forward])
ups$accept <- (ups$nmatch >= minOverlap) & ((ups$nmismatch + ups$nindel) <= maxMismatch)
# Make the sequence
ups$sequence <- mapply(C_pair_consensus, sapply(alvecs,`[`,1), sapply(alvecs,`[`,2), ups$prefer, trimOverhang);
# Additional param to indicate whether 1:forward or 2:reverse takes precedence
# Must also strip out any indels in the return
# This function is only used here.
Funqseq <- unname(as.character(dadaF[[i]]$clustering$sequence[ups$forward]))
Runqseq <- rc(unname(as.character(dadaR[[i]]$clustering$sequence[ups$reverse])))
if (justConcatenate == TRUE) {
# Simply concatenate the sequences together
ups$sequence <- mapply(function(x,y) paste0(x,"NNNNNNNNNN", y),
Funqseq, Runqseq, SIMPLIFY=FALSE);
ups$nmatch <- 0
ups$nmismatch <- 0
ups$nindel <- 0
ups$prefer <- NA
ups$accept <- TRUE
} else {
# Align forward and reverse reads.
# Use unbanded N-W align to compare forward/reverse
# Adjusting align params to prioritize zero-mismatch merges
tmp <- getDadaOpt(c("MATCH", "MISMATCH", "GAP_PENALTY"))
if(maxMismatch==0) {
setDadaOpt(MATCH=1L, MISMATCH=-64L, GAP_PENALTY=-64L)
} else {
setDadaOpt(MATCH=1L, MISMATCH=-8L, GAP_PENALTY=-8L)
}
alvecs <- mapply(function(x,y) nwalign(x,y,band=-1,...), Funqseq, Runqseq, SIMPLIFY=FALSE)
setDadaOpt(tmp)
outs <- t(sapply(alvecs, function(x) C_eval_pair(x[1], x[2])))
ups$nmatch <- outs[,1]
ups$nmismatch <- outs[,2]
ups$nindel <- outs[,3]
ups$prefer <- 1 + (dadaR[[i]]$clustering$n0[ups$reverse] > dadaF[[i]]$clustering$n0[ups$forward])
ups$accept <- (ups$nmatch >= minOverlap) & ((ups$nmismatch + ups$nindel) <= maxMismatch)
# Make the sequence
ups$sequence <- mapply(C_pair_consensus, sapply(alvecs,`[`,1), sapply(alvecs,`[`,2), ups$prefer, trimOverhang);
# Additional param to indicate whether 1:forward or 2:reverse takes precedence
# Must also strip out any indels in the return
# This function is only used here.
}

# Add abundance and sequence to the output data.frame
tab <- table(pairdf$forward, pairdf$reverse)
ups$abundance <- tab[cbind(ups$forward, ups$reverse)]
ups$sequence[!ups$accept] <- ""
# Add columns from forward/reverse clustering
propagateCol <- propagateCol[propagateCol %in% colnames(dadaF[[i]]$clustering)]
for(col in propagateCol) {
ups[,paste0("F.",col)] <- dadaF[[i]]$clustering[ups$forward,col]
ups[,paste0("R.",col)] <- dadaR[[i]]$clustering[ups$reverse,col]
}
# Sort output by abundance and name
ups <- ups[order(ups$abundance, decreasing=TRUE),]
rownames(ups) <- NULL
if(verbose) {
message(sum(ups$abundance[ups$accept]), " paired-reads (in ", sum(ups$accept), " unique pairings) successfully merged out of ", sum(ups$abundance), " (in ", nrow(ups), " pairings) input.")
}
if(!returnRejects) { ups <- ups[ups$accept,] }
# Add abundance and sequence to the output data.frame
tab <- table(pairdf$forward, pairdf$reverse)
ups$abundance <- tab[cbind(ups$forward, ups$reverse)]
ups$sequence[!ups$accept] <- ""
# Add columns from forward/reverse clustering
propagateCol <- propagateCol[propagateCol %in% colnames(dadaF[[i]]$clustering)]
for(col in propagateCol) {
ups[,paste0("F.",col)] <- dadaF[[i]]$clustering[ups$forward,col]
ups[,paste0("R.",col)] <- dadaR[[i]]$clustering[ups$reverse,col]
}
# Sort output by abundance and name
ups <- ups[order(ups$abundance, decreasing=TRUE),]
rownames(ups) <- NULL
if(verbose) {
message(sum(ups$abundance[ups$accept]), " paired-reads (in ", sum(ups$accept), " unique pairings) successfully merged out of ", sum(ups$abundance), " (in ", nrow(ups), " pairings) input.")
}
if(!returnRejects) { ups <- ups[ups$accept,] }

if(any(duplicated(ups$sequence))) {
message("Duplicate sequences in merged output.")
if(any(duplicated(ups$sequence))) {
message("Duplicate sequences in merged output.")
}
return(ups)
}
rval[[i]] <- ups
}
})
if(length(rval) == 1) rval <- rval[[1]]
else if(!is.null(names(dadaF))) names(rval) <- names(dadaF)
if(!is.null(names(dadaF))) names(rval) <- names(dadaF)

return(rval)
}

Expand Down

0 comments on commit 7d8d2df

Please sign in to comment.