Skip to content

Commit

Permalink
Save aligned CIGAR to .cns output files for later dumping to .bam fil…
Browse files Browse the repository at this point in the history
…es; options cnsAlign and bamOutput control this. Bump on-disk tig format version to 2.
  • Loading branch information
brianwalenz committed Jul 17, 2024
1 parent f8215b3 commit b871d94
Show file tree
Hide file tree
Showing 9 changed files with 731 additions and 535 deletions.
47 changes: 23 additions & 24 deletions src/pipelines/canu/Consensus.pm
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ sub utgcns ($$) {
print F " -T ../$asm.ctgStore 1 \\\n";
print F " -P \$jobid \\\n";
print F " -O ./ctgcns/\$jobid.cns.WORKING \\\n";
print F " -B ./ctgcns/\$jobid.bams/ \\\n" if (getGlobal("bamOutput") eq "1");
print F " -norealign \\\n" if ((getGlobal("cnsAlign") eq "0") &&
(getGlobal("bamOutput") eq "0"));
print F " -maxcoverage " . getGlobal('cnsMaxCoverage') . " \\\n";
print F " -e " . getGlobal("cnsErrorRate") . " \\\n";
print F " -em " . getGlobal("cnsErrorRate") . " \\\n";
Expand Down Expand Up @@ -477,39 +478,37 @@ sub consensusLoad ($) {
$cmd .= "> ./5-consensus/ctgcns.files.ctgStoreLoad.err 2>&1";

if (runCommand("unitigging", $cmd)) {
caExit("failed to load unitig consensus into ctgStore", "$path/ctgcns.files.ctgStoreLoad.err");
caExit("failed to load unitig consensus into ctgStore", "$path/ctgcns.files.ctgStoreLoad.err");
}
unlink "$path/ctgcns.files.ctgStoreLoad.err";

stashFile("unitigging/$asm.ctgStore/seqDB.v002.dat");
stashFile("unitigging/$asm.ctgStore/seqDB.v002.tig");
}

# Consolidate BAM outputs.
# And extract bam records.

if (getGlobal("bamOutput") eq "1") {
my $samtools = getGlobal("samtools");
if ((getGlobal("bamOutput") eq "1") &&
(! fileExists("$asm.contigs.bam"))) {
my $samtools = getGlobal("samtools");

open(F, "find $path/ctgcns -type f -print |");
open(O, "> $path/ctgcns.bam.list") or caExit("failed to open '$path/ctgcns.bam.list' for writing: $!\n", undef);
while (<F>) {
if (m!ctgcns/\d+.bams/\d+.bam!) {
print O $_;
$cmd = "$bin/tgTigDisplay \\\n";
$cmd .= " -S ../$asm.seqStore \\\n";
$cmd .= " -b \\\n";
$cmd .= " -o ../$asm.contigs.unsorted.bam \\\n";
$cmd .= " -L ./5-consensus/ctgcns.files \\\n";
$cmd .= "> ./5-consensus/ctgcns.files.contigs.bam.err 2>&1";

if (runCommand("unitigging", $cmd)) {
caExit("failed to extract BAM records from ctgcns files", "$path/ctgcns.files.contigs.bam.err");
}
unlink "$path/ctgcns.files.contigs.bam.err";

if (runCommand(".", "$samtools sort --write-index --threads 2 -o ./$asm.contigs.bam ./$asm.contigs.unsorted.bam > ./ctgcns.bam.err 2>&1")) {
caExit("failed to sort BAM outputs from ./$asm.contigs.unsorted.bam to ./$asm.contigs.bam", "./ctgcns.bam.err");
}
}
close(O);
close(F);

if (runCommand(".", "$samtools merge -o ./$asm.contigs.unsorted.bam -b $path/ctgcns.bam.list > $path/ctgcns.bam.err 2>&1")) {
caExit("failed to merge BAM outputs into ./$asm.contigs.unsorted.bam", "$path/ctgcns.bam.err");
}
unlink "$path/ctgcns.bam.err";

if (runCommand(".", "$samtools sort --write-index --threads 2 -o ./$asm.contigs.bam ./$asm.contigs.unsorted.bam > $path/ctgcns.bam.err 2>&1")) {
caExit("failed to sort BAM outputs from ./$asm.contigs.unsorted.bam to ./$asm.contigs.bam", "$path/ctgcns.bam.err");
}
unlink "./$asm.contigs.unsorted.bam";
unlink "$path/ctgcns.bam.err";
unlink "./$asm.contigs.unsorted.bam";
unlink "./ctgcns.bam.err";
}

# Remove consensus outputs
Expand Down
3 changes: 2 additions & 1 deletion src/pipelines/canu/Defaults.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,8 @@ sub setDefaults () {
setDefault("cnsConsensus", "pbdagcon", "Which consensus algorithm to use; 'pbdagcon' (fast, reliable); 'utgcns' (multialignment output); 'quick' (single read mosaic); default 'pbdagcon'");
setDefault("cnsPartitions", 0, "Attempt to create this many consensus jobs; default '0' = based on the largest tig");

setDefault("bamOutput", 1, "Output contigs as BAM");
setDefault("cnsAlign", 1, "Realign reads to contigs after consensus");
setDefault("bamOutput", 1, "Output contigs as BAM (enables cnsAlign)");
#etDefault("fastaOutput", 1, "Output contigs as FASTA");

##### Correction Options
Expand Down
37 changes: 20 additions & 17 deletions src/stores/tgStore.C
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "tgStore.H"

uint32 MASRmagic = 0x5253414d; // 'MASR', as a big endian integer
uint32 MASRversion = 1;
uint32 MASRversion = 2;

#define MAX_VERS 1024 // Linked to 10 bits in the header file.

Expand Down Expand Up @@ -125,14 +125,6 @@ tgStore::tgStore(const char *path_,
assert(0);
break;
}


// Fail (again?) if there are no tigs loaded.

//if (_tigLen == 0) {
// fprintf(stderr, "tgStore::tgStore()-- ERROR, didn't find any tigs in the store. Correct version?\n");
// exit(1);
//}
}


Expand Down Expand Up @@ -643,18 +635,29 @@ tgStore::loadMASR(tgStoreEntry* &R, uint32& L, uint32& M, uint32 V) {
exit(1);
}

if (MASRversionInFile != MASRversion) {
fprintf(stderr, "tgStore::loadMASR()-- Failed to open '%s': version number mismatch; file=%d code=%d\n",
_name, MASRversionInFile, MASRversion);
exit(1);
}

// Check we're consistent.
if (L < MASRtotalInFile)
fprintf(stderr, "tgStore::loadMASR()-- '%s' has more tigs (" F_U32 ") than expected (" F_U32 ").\n",
_name, MASRtotalInFile, L), exit(1);

loadFromFile(R, "MASR", masrLen, F);
if (MASRversionInFile == 1) {
tgStoreEntry *Rv1 = new tgStoreEntry [M];
loadFromFile(Rv1, "MASR", masrLen, F);

for (uint32 ii=0; ii<masrLen; ii++)
R[ii] = Rv1[ii]; // A format conversion!

delete [] Rv1;
}

else if (MASRversionInFile == 2) {
loadFromFile(R, "MASR", masrLen, F);
}

else {
fprintf(stderr, "tgStore::loadMASR()-- Failed to open '%s': version number mismatch; file=%d code=%d\n",
_name, MASRversionInFile, MASRversion);
exit(1);
}

merylutil::closeFile(F, _name);
}
Expand Down
33 changes: 27 additions & 6 deletions src/stores/tgStore.H
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,34 @@ public:
};

private:
struct tgStoreEntryV1 {
tgTigRecordV1 tigRecord;
uint64 unusedFlags : 12; // One whole bit for future use.
uint64 flushNeeded : 1; // If true, this MAR and associated tig are NOT saved to disk.
uint64 isDeleted : 1; // If true, this MAR has been deleted from the assembly.
uint64 svID : 10; // 10 -> 1024 versions (HARDCODED in tgStore.C)
uint64 fileOffset : 40; // 40 -> 1 TB file size; offset in file where MA is stored
};
struct tgStoreEntry {
tgTigRecord tigRecord;
uint64 unusedFlags : 12; // One whole bit for future use.
uint64 flushNeeded : 1; // If true, this MAR and associated tig are NOT saved to disk.
uint64 isDeleted : 1; // If true, this MAR has been deleted from the assembly.
uint64 svID : 10; // 10 -> 1024 versions (HARDCODED in tgStore.C)
uint64 fileOffset : 40; // 40 -> 1 TB file size; offset in file where MA is stored
tgTigRecord tigRecord;
uint64 unusedFlags : 12; // One whole bit for future use.
uint64 flushNeeded : 1; // If true, this MAR and associated tig are NOT saved to disk.
uint64 isDeleted : 1; // If true, this MAR has been deleted from the assembly.
uint64 svID : 10; // 10 -> 1024 versions (HARDCODED in tgStore.C)
uint64 fileOffset : 40; // 40 -> 1 TB file size; offset in file where MA is stored

tgStoreEntry &
operator=(const tgStoreEntryV1 &that) {
tigRecord = that.tigRecord; // Itself an overloaded operator=().

unusedFlags = that.unusedFlags;
flushNeeded = that.flushNeeded;
isDeleted = that.isDeleted;
svID = that.svID;
fileOffset = that.fileOffset;

return *this;
}
};

void writeTigToDisk(tgTig *ma, tgStoreEntry *maRecord);
Expand Down
Loading

0 comments on commit b871d94

Please sign in to comment.