Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/bwamem2 #54

Merged
merged 8 commits into from
Feb 14, 2020
Prev Previous commit
Next Next commit
bwa-mem2 and efficiency in merging steps
  • Loading branch information
keiranmraine committed Feb 8, 2020
commit 7b6f57afa4899c2ae1dec9312421a6fb60cfec25
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ liblzma5 \
libncurses5 \
p11-kit \
libcurl3 \
moreutils \
unattended-upgrades && \
unattended-upgrade -d -v && \
apt-get remove -yq unattended-upgrades && \
Expand Down
3 changes: 2 additions & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ apt-get -yqq install \
lsof \
time \
libgd-perl \
psmisc
psmisc \
moreutils
```

### Amazon Linux AMI
Expand Down
11 changes: 4 additions & 7 deletions bin/bwa_mem.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

##########LICENCE##########
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
# Copyright (C) 2014-2018 ICGC PanCancer Project
# Copyright (C) 2014-2020 ICGC PanCancer Project
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down Expand Up @@ -52,7 +52,7 @@
my $options = setup();

my $threads = PCAP::Threaded->new($options->{'threads'});
&PCAP::Threaded::disable_out_err if(!exists $options->{'index'} && $options->{'threads'} == 1);
&PCAP::Threaded::disable_out_err if(exists $options->{'index'});

# register processes
$threads->add_function('split', \&PCAP::Bwa::split_in);
Expand Down Expand Up @@ -98,8 +98,8 @@ sub setup {
'm|man' => \$opts{'m'},
'v|version' => \$opts{'v'},
'j|jobs' => \$opts{'jobs'},
't|threads=i' => \$opts{'threads'},
'mt|map_threads=i' => \$opts{'map_threads'},
't|threads:i' => \$opts{'threads'},
'mt|map_threads:i' => \$opts{'map_threads'},
'r|reference=s' => \$opts{'reference'},
'o|outdir=s' => \$opts{'outdir'},
's|sample=s' => \$opts{'sample'},
Expand All @@ -125,9 +125,6 @@ sub setup {
exit 0;
}

my $version = PCAP::Bwa::bwamem2_version();
die "bwa mem can only be used with bwa version 0.7+, the version found in path is: $version\n" unless(version->parse($version) >= version->parse('0.7.0'));

# then check for no args:
my $defined;
for(keys %opts) { $defined++ if(defined $opts{$_}); }
Expand Down
8 changes: 4 additions & 4 deletions bin/merge_or_mark.pl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
{
my $options = setup();

PCAP::Bwa::mem_setup($options) if(!exists $options->{'process'} || $options->{'process'} eq 'setup');
PCAP::Bwa::mem_setup($options, 1) if(!exists $options->{'process'} || $options->{'process'} eq 'setup');

if(!exists $options->{'process'} || $options->{'process'} eq 'mark') {
PCAP::Bam::merge_or_mark_lanes($options, @{$options->{'raw_files'}});
Expand Down Expand Up @@ -75,7 +75,7 @@ sub setup {
GetOptions( 'h|help' => \$opts{'h'},
'm|man' => \$opts{'m'},
'v|version' => \$opts{'v'},
't|threads=i' => \$opts{'threads'},
't|threads:i' => \$opts{'threads'},
'r|reference=s' => \$opts{'reference'},
'o|outdir=s' => \$opts{'outdir'},
's|sample=s' => \$opts{'sample'},
Expand Down Expand Up @@ -144,7 +144,7 @@ sub setup {

=head1 NAME

merge_or_mark.pl - Merge multiple lanes generated by bwa_mem.pl into sample level file.
merge_or_mark.pl - Merge multiple lanes generated by bwa_mem.pl into sample level file

=head1 SYNOPSIS

Expand All @@ -154,9 +154,9 @@ =head1 SYNOPSIS
-outdir -o Folder to output result to.
-reference -r Path to reference genome file *.fa[.gz]
-sample -s Sample name to be applied to output file.
-threads -t Number of threads to use (max=4). [1]

Optional parameters:
-threads -t Number of threads to use (max=4). [1]
-nomarkdup -n Don't mark duplicates [flag]
-csi Use CSI index instead of BAI for BAM files [flag].
-cram -c Output cram, see '-sc' [flag]
Expand Down
9 changes: 5 additions & 4 deletions lib/PCAP/Bam.pm
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ use PCAP::Threaded;

const my $BAMCOLLATE => q{(%s colsbs=268435456 collate=1 reset=1 exclude=SECONDARY,QCFAIL,SUPPLEMENTARY classes=F,F2 T=%s filename=%s level=1 > %s)};
const my $MISMATCHQC => q{| %s -l 0 -t %.2f -p };
const my $BAMBAM_DUP => q{%s level=0 %s | %s tmpfile=%s level=0 markthreads=%d M=%s.met %s| %s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s | tee %s | %s -o %s.bas -@ %d};
const my $BAMBAM_MERGE => q{%s %s tmpfile=%s level=0 %s| %s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s | tee %s | %s -o %s.bas -@ %d};
const my $BAMBAM_DUP => q{%s level=0 %s | %s tmpfile=%s level=0 markthreads=%d M=%s.met %s| pee '%s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s > %s' '%s -o %s.bas -@ %d'};
const my $BAMBAM_MERGE => q{%s %s tmpfile=%s level=0 %s| pee '%s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s > %s' '%s -o %s.bas -@ %d'};
const my $BAMBAM_DUP_CRAM => q{%s level=0 %s | %s tmpfile=%s M=%s.met markthreads=%s level=0 %s| %s -r %s -t %d -I bam -O cram %s | tee %s | %s index - %s.crai};
const my $BAMBAM_MERGE_CRAM => q{%s %s tmpfile=%s level=0 %s| %s -r %s -t %d -I bam -O cram %s | tee %s | %s index - %s.crai};

const my $LANE_BAMBAM_MERGE => q{%s %s tmpfile=%s level=0 | %s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s | tee %s | %s -o %s.bas -@ %d};
const my $LANE_BAMBAM_MERGE => q{%s %s tmpfile=%s level=0 | pee '%s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s > %s' '%s -o %s.bas -@ %d'};
const my $LANE_BAMBAM_MERGE_CRAM => q{%s %s tmpfile=%s level=0 | %s -r %s -t %d -I bam -O cram %s | tee %s | %s index - %s.crai};
const my $LANE_BAMBAM_DUP => q{%s level=0 %s | %s -l 0 -m | %s tmpfile=%s level=0 markthreads=%d M=%s.met | %s -l 0 -p | %s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s | tee %s | %s -o %s.bas -@ %d};
const my $LANE_BAMBAM_DUP => q{%s level=0 %s | %s -l 0 -m | %s tmpfile=%s level=0 markthreads=%d M=%s.met | %s -l 0 -p | pee '%s tmpfile=%s index=1 md5=1 numthreads=%d md5filename=%s.md5 indexfilename=%s.%s > %s' '%s -o %s.bas -@ %d'};
const my $LANE_BAMBAM_DUP_CRAM => q{%s level=0 %s | %s -l 0 -m | %s tmpfile=%s level=0 markthreads=%d M=%s.met | %s -l 0 -p | %s -r %s -t %d -I bam -O cram %s | tee %s | %s index - %s.crai};

const my $CRAM_CHKSUM => q{md5sum %s | perl -ne '/^(\S+)/; print "$1";' > %s.md5};
Expand Down Expand Up @@ -114,6 +114,7 @@ sub merge_or_mark_lanes {
my @commands;
return $marked if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
my $helper_threads = $options->{'threads'}-1;
$helper_threads = 1 if($helper_threads < 1);

my $input_str = ' I='.join(' I=', sort @sorted_bams);

Expand Down
8 changes: 4 additions & 4 deletions lib/PCAP/Bwa.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package PCAP::Bwa;

##########LICENCE##########
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
# Copyright (C) 2014-2018 ICGC PanCancer Project
# Copyright (C) 2014-2020 ICGC PanCancer Project
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down Expand Up @@ -62,7 +62,7 @@ sub bwamem2_version {
{
my ($stdout, $stderr, $exit) = capture{ system("$bwa version"); };
chomp $stdout;
($version) = $stdout =~ /([[:digit:]\.]+)/m;
$version = $stdout;
}
return $version;
}
Expand All @@ -79,7 +79,7 @@ sub bwa_version {
}

sub mem_setup {
my $options = shift;
my ($options, $skip_mmqc_check) = @_;
if($options->{'reference'} =~ m/\.gz$/) {
my $tmp_ref = $options->{'reference'};
$tmp_ref =~ s/\.gz$//;
Expand All @@ -94,7 +94,7 @@ sub mem_setup {
}
# do some checking to ensure input BAM/CRAM hasn't been through mismatchQc
# if it has check for use of at least bammaskflags
PCAP::Bam::mismatchQc_checks($options->{'raw_files'});
PCAP::Bam::mismatchQc_checks($options->{'raw_files'}) unless($skip_mmqc_check);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If merging pre-generated lanes we don't want to block stuff data processed by mmQc. This step is reused by mapping and merging, hence the distinction

return 1;
}

Expand Down