Skip to content

Commit

Permalink
Merge branch 'release/5.2.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Jul 2, 2020
2 parents a2bcb9d + d641be6 commit 0b5d9ca
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 12 deletions.
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# CHANGES

## 5.2.0

* Expose option for "legacy" to allow for <=5.0.5 processing methods.
* `bamtofastq` when pulling reads from BAM/CRAM input.
* `bammarkduplicates2` for duplicate marking.
* Affects `bwa_mem.pl` and `merge_or_mark.pl`

## 5.1.0

* Base image updated to Focal (Ubuntu 20.04).
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ FROM ubuntu:20.04

LABEL maintainer="cgphelp@sanger.ac.uk"\
uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Sanger Institute" \
version="5.1.0" \
version="5.2.0" \
description="pcap-core"

ENV OPT /opt/wtsi-cgp
Expand Down
22 changes: 21 additions & 1 deletion bin/bwa_mem.pl
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ sub setup {
'qf|mmqcfrac:f' => \$opts{'mmqcfrac'},
'bm2|bwamem2' => \$opts{'bwamem2'},
'd|dupmode:s' => \$opts{'dupmode'},
'legacy' => \$opts{'legacy'},
'ss|seqslice:i' => $opts{'seqslice'},
) or pod2usage(2);

Expand Down Expand Up @@ -178,6 +179,11 @@ sub setup {
delete $opts{'mmqc'} unless(defined $opts{'mmqc'});
delete $opts{'csi'} unless(defined $opts{'csi'});
delete $opts{'bwamem2'} unless(defined $opts{'bwamem2'});
delete $opts{'legacy'} unless(defined $opts{'legacy'});

if(defined $opts{'bwamem2'} && defined $opts{'legacy'}) {
warn "WARN: Use of options bwamem2 and legacy is suboptimal, proceeding but memory will be excessive.\n";
}

PCAP::Cli::opt_requires_opts('scramble', \%opts, ['cram']);

Expand Down Expand Up @@ -243,7 +249,7 @@ =head1 SYNOPSIS
-threads -t Number of threads to use. [1]
Optional parameters:
-bwamem2 -bm2 Use bwa-mem2 instead of bwa.
-bwamem2 -bm2 Use bwa-mem2 instead of bwa (experimental).
-fragment -f Split input into fragments of X million repairs [10]
- only applies to fastq[.gz] input
-nomarkdup -n Don't mark duplicates [flag]
Expand All @@ -261,6 +267,10 @@ =head1 SYNOPSIS
- Please see 'bwa_mem.pl -m'
-mmqcfrac -qf Mismatch fraction for -mmqc [0.05]
-dupmode -d see "samtools markdup -m" [t]
-legacy Equivalent to PCAP-core<=5.0.5
- bamtofastq instead of samtools collate (for BAM/CRAM input)
- dupmode ignored as uses bammarkduplicates2
- Avoid use with bwamem2 (memory explosion)
Targeted processing:
-process -p Only process this step then exit, optionally set -index
Expand Down Expand Up @@ -360,6 +370,16 @@ =head2 OPTIONAL parameters
Disables duplicate marking, switching bammarkduplicates2 for bammerge.
=item B<-dupmode>
Switch between template and sequence based marking. See "samtools markdup" man page for more details
=item B<-legacy>
Processing equivalent to versions of PCAP-core <= 5.0.5 (bamtofastq + bammarkduplicates2)
Not recommended with bwamem2 - memory explosions.
=item B<-csi>
User CSI style index for final BAM file instead of default BAI.
Expand Down
5 changes: 4 additions & 1 deletion bin/merge_or_mark.pl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ sub setup {
'c|cram' => \$opts{'cram'},
'sc|scramble=s' => \$opts{'scramble'},
'd|dupmode:s' => \$opts{'dupmode'},
'legacy' => \$opts{'legacy'},
'ss|seqslice:i' => $opts{'seqslice'},
) or pod2usage(2);

Expand Down Expand Up @@ -121,6 +122,7 @@ sub setup {

delete $opts{'process'} unless(defined $opts{'process'});
delete $opts{'index'} unless(defined $opts{'index'});
delete $opts{'legacy'} unless(defined $opts{'legacy'});
delete $opts{'scramble'};
delete $opts{'csi'} unless(defined $opts{'csi'});
if($opts{'qnamesort'} && !$opts{'nomarkdup'}){
Expand Down Expand Up @@ -183,7 +185,8 @@ =head1 SYNOPSIS
-cram -c Output cram, see '-sc' [flag]
-seqslice -ss seqs_per_slice for CRAM compression [samtools default: 10000]
-scramble -sc DEPRECATED
-dupmode -d see "samtools markdup -m" [t]
-dupmode -d See "samtools markdup -m" [t]
-legacy Use legacy bammarkduplicates2, ignores '-dupmode'
Targeted processing:
-process -p Only process this step then exit
Expand Down
2 changes: 1 addition & 1 deletion lib/PCAP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use FindBin qw($Bin);
use File::Which qw(which);
# don't use autodie, only core perl in here

our $VERSION = '5.1.0';
our $VERSION = '5.2.0';
our @EXPORT = qw($VERSION _which);

const my $LICENSE =>
Expand Down
30 changes: 28 additions & 2 deletions lib/PCAP/Bam.pm
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,21 @@ sub merge_or_mark_lanes {
else {
my $merge = sprintf q{%s merge -u -@ %d - %s},
$tools{samtools}, $helper_threads, $input_str;
my $markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -},
my $markdup;
if(exists $options->{legacy}) {
my $mmflagmod = _which('mmFlagModifier') || die "Unable to find 'mmFlagModifier' in path";
my $bammarkdups = _which('bammarkduplicates2') || die "Unable to find 'bammarkduplicates2' in path";

my $mmQcRemove = sprintf '%s --remove -l 0 -@ %d', $mmflagmod, $helper_threads;
my $bammarkdup = sprintf '%s tmpfile=%s M=%s.met level=0 markthreads=%d', $bammarkdups, $strmd_tmp, $marked, $helper_threads;
my $mmQcReplace = sprintf '%s --replace -l 0 -@ %d', $mmflagmod, $helper_threads;

$markdup = sprintf q{%s | %s | %s}, $mmQcRemove, $bammarkdup, $mmQcReplace;
}
else {
$markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -},
$tools{samtools}, $options->{dupmode}, $strmd_tmp, $helper_threads, $marked;
}
my $compress = sprintf q{%s view -T %s --output-fmt %s -@ %d -},
$tools{samtools}, $options->{reference}, $out_fmt, $helper_threads;
my $idx = sprintf q{%s index -@ %d %s - %s.%s},
Expand Down Expand Up @@ -254,8 +267,21 @@ sub merge_and_mark_dup {
else {
my $merge = sprintf q{%s merge -u -@ %d - %s},
$tools{samtools}, $helper_threads, $input_str;
my $markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -},
my $markdup;
if(exists $options->{legacy}) {
my $mmflagmod = _which('mmFlagModifier') || die "Unable to find 'mmFlagModifier' in path";
my $bammarkdups = _which('bammarkduplicates2') || die "Unable to find 'bammarkduplicates2' in path";

my $mmQcRemove = sprintf '%s --remove -l 0 -@ %d', $mmflagmod, $helper_threads;
my $bammarkdup = sprintf '%s tmpfile=%s M=%s.met level=0 markthreads=%d', $bammarkdups, $strmd_tmp, $marked, $helper_threads;
my $mmQcReplace = sprintf '%s --replace -l 0 -@ %d', $mmflagmod, $helper_threads;

$markdup = sprintf q{%s | %s | %s}, $mmQcRemove, $bammarkdup, $mmQcReplace;
}
else {
$markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -},
$tools{samtools}, $options->{dupmode}, $strmd_tmp, $helper_threads, $marked;
}
my $compress = sprintf q{%s view -T %s --output-fmt %s -@ %d -},
$tools{samtools}, $options->{reference}, $out_fmt, $helper_threads;
my $idx = sprintf q{%s index -@ %d %s - %s.%s},
Expand Down
28 changes: 22 additions & 6 deletions lib/PCAP/Bwa.pm
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,19 @@ sub split_in {
my $samtools = _which('samtools') || die "Unable to find 'samtools' in path";
my $mmQcStrip = sprintf '%s --remove -l 0 -@ %d -i %s', _which('mmFlagModifier'), $helpers, $input->in;
my $view = sprintf '%s view %s -bu -T %s -F 2816 -@ %d -', $samtools, $TAG_STRIP, $options->{'reference'}, $helpers; # leave
my $collate = sprintf '%s collate -Ou -@ %d - %s/collate', $samtools, $helpers, $collate_folder;
my $split = sprintf '%s split --output-fmt bam,level=1 -@ %d -u %s/unknown.bam -f %s/%%!_i.bam -', $samtools, $helpers, $split_folder, $split_folder;
my $cmd = sprintf '%s | %s | %s | %s', $mmQcStrip, $view, $collate, $split;
my $collate_split;
if(exists $options->{legacy}) {
my $bamtofastq = _which('bamtofastq') || die "Unable to find 'bamtofastq' in path";
$collate_split = sprintf '%s exclude=QCFAIL,SECONDARY,SUPPLEMENTARY tryoq=1 gz=1 level=1 outputperreadgroup=1 outputperreadgroupsuffixF=_i.fq outputperreadgroupsuffixF2=_i.fq T=%s/bamtofastq outputdir=%s split=%s',
$bamtofastq, $collate_folder, $split_folder,
$fragment_size * $MILLION * $BAM_MULT;
}
else {
my $collate = sprintf '%s collate -Ou -@ %d - %s/collate', $samtools, $helpers, $collate_folder;
my $split = sprintf '%s split --output-fmt bam,level=1 -@ %d -u %s/unknown.bam -f %s/%%!_i.bam -', $samtools, $helpers, $split_folder, $split_folder;
$collate_split = sprintf '%s | %s', $collate, $split;
}
my $cmd = sprintf '%s | %s | %s', $mmQcStrip, $view, $collate_split;
# treat as interleaved fastq
push @commands, 'set -o pipefail';
push @commands, $cmd;
Expand Down Expand Up @@ -314,9 +324,15 @@ sub bwa_mem {
}
}
else {
# bam/cram
my $tofastq = sprintf '%s fastq -@ %d -N %s', $tools{samtools}, $threads, $split;
$bwa = sprintf '%s | %s /dev/stdin', $tofastq, $bwa;
# due to legacy processing need to handle bam or fastq input here
if($split =~ m/\.gz$/) {
$bwa .= ' '.$split;
}
else {
# bam/cram
my $tofastq = sprintf '%s fastq -@ %d -N %s', $tools{samtools}, $threads, $split;
$bwa = sprintf '%s | %s /dev/stdin', $tofastq, $bwa;
}
}

my $sorted_bam_stub = $split;
Expand Down

0 comments on commit 0b5d9ca

Please sign in to comment.