From 82c2404281946e93042138346427548529c5eee1 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Fri, 26 Jun 2020 15:02:16 +0000 Subject: [PATCH 01/10] Expose fastcollate option --- CHANGES.md | 4 ++++ bin/bwa_mem.pl | 23 ++++++++++++++++++++++- lib/PCAP/Bwa.pm | 5 ++++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 5b61995..3e14be5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ # CHANGES +## NEXT + +* Expose option for "fastcollate" to allow proxy for <=5.0.5 processing methods. + ## 5.1.0 * Base image updated to Focal (Ubuntu 20.04). diff --git a/bin/bwa_mem.pl b/bin/bwa_mem.pl index 6f7afe4..096352d 100755 --- a/bin/bwa_mem.pl +++ b/bin/bwa_mem.pl @@ -136,6 +136,7 @@ sub setup { 'qf|mmqcfrac:f' => \$opts{'mmqcfrac'}, 'bm2|bwamem2' => \$opts{'bwamem2'}, 'd|dupmode:s' => \$opts{'dupmode'}, + 'fc|fastcollate' => \$opts{'fastcollate'}, 'ss|seqslice:i' => $opts{'seqslice'}, ) or pod2usage(2); @@ -178,6 +179,11 @@ sub setup { delete $opts{'mmqc'} unless(defined $opts{'mmqc'}); delete $opts{'csi'} unless(defined $opts{'csi'}); delete $opts{'bwamem2'} unless(defined $opts{'bwamem2'}); + delete $opts{'fastcollate'} unless(defined $opts{'fastcollate'}); + + if(defined $opts{'bwamem2'} && defined $opts{'fastcollate'}) { + pod2usage(-msg => "\nERROR: Options bwamem2 and fastcollate are incomptible.\n", -verbose => 1, -output => \*STDERR); + } PCAP::Cli::opt_requires_opts('scramble', \%opts, ['cram']); @@ -243,7 +249,7 @@ =head1 SYNOPSIS -threads -t Number of threads to use. [1] Optional parameters: - -bwamem2 -bm2 Use bwa-mem2 instead of bwa. + -bwamem2 -bm2 Use bwa-mem2 instead of bwa (experimental). -fragment -f Split input into fragments of X million repairs [10] - only applies to fastq[.gz] input -nomarkdup -n Don't mark duplicates [flag] @@ -261,6 +267,9 @@ =head1 SYNOPSIS - Please see 'bwa_mem.pl -m' -mmqcfrac -qf Mismatch fraction for -mmqc [0.05] -dupmode -d see "samtools markdup -m" [t] + -fastcollate -fc Paired with `-dupmode t` equivalent to PCAP-core<=5.0.5 + - Only relevant to BAM/CRAM input + - not compatible with bwamem2 Targeted processing: -process -p Only process this step then exit, optionally set -index @@ -360,6 +369,18 @@ =head2 OPTIONAL parameters Disables duplicate marking, switching bammarkduplicates2 for bammerge. +=item B<-dupmode> + +Switch between template and sequence based marking. See "samtools markdup" man page for more details + +=item B<-fastcollate> + +For BAM/CRAM, brings read pairs together but doesn't result in even distribution of read location during input to +bwa-mem. This can result in errors in read-pair placement and noise in data. This was part of the processing in +versions of PCAP-core <= 5.0.5. + +Not compaitible with bwa-mem2. + =item B<-csi> User CSI style index for final BAM file instead of default BAI. diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index a963fb9..819c92f 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -208,13 +208,16 @@ sub split_in { } # if bam|cram input else { + my $fastcollate = q{}; + $fastcollate = q{-f} if(exists $options->{fastcollate}); + my $helpers = $options->{threads_per_split}; my $collate_folder = File::Spec->catdir($options->{'tmp'}, 'collate', $index); make_path($collate_folder) unless(-d $collate_folder); my $samtools = _which('samtools') || die "Unable to find 'samtools' in path"; my $mmQcStrip = sprintf '%s --remove -l 0 -@ %d -i %s', _which('mmFlagModifier'), $helpers, $input->in; my $view = sprintf '%s view %s -bu -T %s -F 2816 -@ %d -', $samtools, $TAG_STRIP, $options->{'reference'}, $helpers; # leave - my $collate = sprintf '%s collate -Ou -@ %d - %s/collate', $samtools, $helpers, $collate_folder; + my $collate = sprintf '%s collate -Ou -@ %d %s - %s/collate', $samtools, $helpers, $fastcollate, $collate_folder; my $split = sprintf '%s split --output-fmt bam,level=1 -@ %d -u %s/unknown.bam -f %s/%%!_i.bam -', $samtools, $helpers, $split_folder, $split_folder; my $cmd = sprintf '%s | %s | %s | %s', $mmQcStrip, $view, $collate, $split; # treat as interleaved fastq From 46aa606846024fbe70ca78deb2152651e42e6eca Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Sat, 27 Jun 2020 12:41:02 +0000 Subject: [PATCH 02/10] Finalise changes, version and changelog --- CHANGES.md | 4 ++-- Dockerfile | 2 +- lib/PCAP.pm | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 3e14be5..46e6aa9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,8 @@ # CHANGES -## NEXT +## 5.2.0 -* Expose option for "fastcollate" to allow proxy for <=5.0.5 processing methods. +* Expose option for "fastcollate" to allow proxy for <=5.0.5 processing methods when combined with `-dupmode t`. ## 5.1.0 diff --git a/Dockerfile b/Dockerfile index c112cfa..8265e02 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,7 +62,7 @@ FROM ubuntu:20.04 LABEL maintainer="cgphelp@sanger.ac.uk"\ uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Sanger Institute" \ - version="5.1.0" \ + version="5.2.0" \ description="pcap-core" ENV OPT /opt/wtsi-cgp diff --git a/lib/PCAP.pm b/lib/PCAP.pm index d75980c..817557c 100644 --- a/lib/PCAP.pm +++ b/lib/PCAP.pm @@ -28,7 +28,7 @@ use FindBin qw($Bin); use File::Which qw(which); # don't use autodie, only core perl in here -our $VERSION = '5.1.0'; +our $VERSION = '5.2.0'; our @EXPORT = qw($VERSION _which); const my $LICENSE => From 61541f90b320a63cd85bcd3c1f68d5a96bb14965 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 29 Jun 2020 08:12:37 +0000 Subject: [PATCH 03/10] Ensure bwamem2 + fastcollate is only a warning --- bin/bwa_mem.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/bwa_mem.pl b/bin/bwa_mem.pl index 096352d..e5c7ec3 100755 --- a/bin/bwa_mem.pl +++ b/bin/bwa_mem.pl @@ -182,7 +182,7 @@ sub setup { delete $opts{'fastcollate'} unless(defined $opts{'fastcollate'}); if(defined $opts{'bwamem2'} && defined $opts{'fastcollate'}) { - pod2usage(-msg => "\nERROR: Options bwamem2 and fastcollate are incomptible.\n", -verbose => 1, -output => \*STDERR); + warn "WARN: Use of options bwamem2 and fastcollate is suboptimal, proceeding but memory will be excessive.\n"); } PCAP::Cli::opt_requires_opts('scramble', \%opts, ['cram']); @@ -269,7 +269,7 @@ =head1 SYNOPSIS -dupmode -d see "samtools markdup -m" [t] -fastcollate -fc Paired with `-dupmode t` equivalent to PCAP-core<=5.0.5 - Only relevant to BAM/CRAM input - - not compatible with bwamem2 + - Avoid use with bwamem2 (memory explosion) Targeted processing: -process -p Only process this step then exit, optionally set -index From 40905a1c5e636358194e0092c67f21e88ecba685 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 29 Jun 2020 09:49:56 +0000 Subject: [PATCH 04/10] Fix typo --- bin/bwa_mem.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/bwa_mem.pl b/bin/bwa_mem.pl index e5c7ec3..419e7ff 100755 --- a/bin/bwa_mem.pl +++ b/bin/bwa_mem.pl @@ -182,7 +182,7 @@ sub setup { delete $opts{'fastcollate'} unless(defined $opts{'fastcollate'}); if(defined $opts{'bwamem2'} && defined $opts{'fastcollate'}) { - warn "WARN: Use of options bwamem2 and fastcollate is suboptimal, proceeding but memory will be excessive.\n"); + warn "WARN: Use of options bwamem2 and fastcollate is suboptimal, proceeding but memory will be excessive.\n"; } PCAP::Cli::opt_requires_opts('scramble', \%opts, ['cram']); From 18144358a20353c6aac13061155e930b910babca Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 29 Jun 2020 10:57:29 +0000 Subject: [PATCH 05/10] fastcollate will use bamtofastq to retain legacy processing as an option --- lib/PCAP/Bwa.pm | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index 819c92f..e5c4934 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -208,18 +208,25 @@ sub split_in { } # if bam|cram input else { - my $fastcollate = q{}; - $fastcollate = q{-f} if(exists $options->{fastcollate}); - my $helpers = $options->{threads_per_split}; my $collate_folder = File::Spec->catdir($options->{'tmp'}, 'collate', $index); make_path($collate_folder) unless(-d $collate_folder); my $samtools = _which('samtools') || die "Unable to find 'samtools' in path"; my $mmQcStrip = sprintf '%s --remove -l 0 -@ %d -i %s', _which('mmFlagModifier'), $helpers, $input->in; my $view = sprintf '%s view %s -bu -T %s -F 2816 -@ %d -', $samtools, $TAG_STRIP, $options->{'reference'}, $helpers; # leave - my $collate = sprintf '%s collate -Ou -@ %d %s - %s/collate', $samtools, $helpers, $fastcollate, $collate_folder; - my $split = sprintf '%s split --output-fmt bam,level=1 -@ %d -u %s/unknown.bam -f %s/%%!_i.bam -', $samtools, $helpers, $split_folder, $split_folder; - my $cmd = sprintf '%s | %s | %s | %s', $mmQcStrip, $view, $collate, $split; + my $collate_split; + if(exists $options->{fastcollate}) { + my $bamtofastq = _which('bamtofastq') || die "Unable to find 'bamtofastq' in path"; + $collate_split = sprintf '%s exclude=QCFAIL,SECONDARY,SUPPLEMENTARY tryoq=1 gz=1 level=1 outputperreadgroup=1 outputperreadgroupsuffixF=_i.fq outputperreadgroupsuffixF2=_i.fq T=%s outputdir=%s split=%s', + $bamtofastq, $collate_folder, $split_folder, + $fragment_size * $MILLION * $BAM_MULT; + } + else { + my $collate = sprintf '%s collate -Ou -@ %d - %s/collate', $samtools, $helpers, $collate_folder; + my $split = sprintf '%s split --output-fmt bam,level=1 -@ %d -u %s/unknown.bam -f %s/%%!_i.bam -', $samtools, $helpers, $split_folder, $split_folder; + $collate_split = sprintf '%s | %s', $collate, $split; + } + my $cmd = sprintf '%s | %s | %s | %s', $mmQcStrip, $view, $collate_split; # treat as interleaved fastq push @commands, 'set -o pipefail'; push @commands, $cmd; From 3cb9f4ea8e2a64f76bedc66ed695fc62a5f235b7 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 29 Jun 2020 12:00:59 +0000 Subject: [PATCH 06/10] handle error in split sprintf and different file types for split due to legacy processing --- lib/PCAP/Bwa.pm | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index e5c4934..e5ff71f 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -226,7 +226,7 @@ sub split_in { my $split = sprintf '%s split --output-fmt bam,level=1 -@ %d -u %s/unknown.bam -f %s/%%!_i.bam -', $samtools, $helpers, $split_folder, $split_folder; $collate_split = sprintf '%s | %s', $collate, $split; } - my $cmd = sprintf '%s | %s | %s | %s', $mmQcStrip, $view, $collate_split; + my $cmd = sprintf '%s | %s | %s', $mmQcStrip, $view, $collate_split; # treat as interleaved fastq push @commands, 'set -o pipefail'; push @commands, $cmd; @@ -324,9 +324,15 @@ sub bwa_mem { } } else { - # bam/cram - my $tofastq = sprintf '%s fastq -@ %d -N %s', $tools{samtools}, $threads, $split; - $bwa = sprintf '%s | %s /dev/stdin', $tofastq, $bwa; + # due to legacy processing need to handle bam or fastq input here + if($split =~ m/\.fq\.gz$/) { + $bwa .= ' '.$split; + } + else { + # bam/cram + my $tofastq = sprintf '%s fastq -@ %d -N %s', $tools{samtools}, $threads, $split; + $bwa = sprintf '%s | %s /dev/stdin', $tofastq, $bwa; + } } my $sorted_bam_stub = $split; From 0c1c1fa3175105f68b0ddcd6c724db1d996bd30d Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 29 Jun 2020 12:43:36 +0000 Subject: [PATCH 07/10] correct temp file --- lib/PCAP/Bwa.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index e5ff71f..de59ae9 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -217,7 +217,7 @@ sub split_in { my $collate_split; if(exists $options->{fastcollate}) { my $bamtofastq = _which('bamtofastq') || die "Unable to find 'bamtofastq' in path"; - $collate_split = sprintf '%s exclude=QCFAIL,SECONDARY,SUPPLEMENTARY tryoq=1 gz=1 level=1 outputperreadgroup=1 outputperreadgroupsuffixF=_i.fq outputperreadgroupsuffixF2=_i.fq T=%s outputdir=%s split=%s', + $collate_split = sprintf '%s exclude=QCFAIL,SECONDARY,SUPPLEMENTARY tryoq=1 gz=1 level=1 outputperreadgroup=1 outputperreadgroupsuffixF=_i.fq outputperreadgroupsuffixF2=_i.fq T=%s/bamtofastq outputdir=%s split=%s', $bamtofastq, $collate_folder, $split_folder, $fragment_size * $MILLION * $BAM_MULT; } From 1a044d283d41645a05c9c543d926c689b560fc7c Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 29 Jun 2020 13:03:27 +0000 Subject: [PATCH 08/10] pattern out of bamtofastq not as expected --- lib/PCAP/Bwa.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index de59ae9..dd6c356 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -325,7 +325,7 @@ sub bwa_mem { } else { # due to legacy processing need to handle bam or fastq input here - if($split =~ m/\.fq\.gz$/) { + if($split =~ m/\.gz$/) { $bwa .= ' '.$split; } else { From 4b112af6bb89a25c7ef8bfc5d442161c88d8af7d Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Wed, 1 Jul 2020 08:35:23 +0000 Subject: [PATCH 09/10] Change to 'legacy' flag --- CHANGES.md | 2 +- bin/bwa_mem.pl | 21 ++++++++++----------- bin/merge_or_mark.pl | 5 ++++- lib/PCAP/Bam.pm | 30 ++++++++++++++++++++++++++++-- lib/PCAP/Bwa.pm | 2 +- 5 files changed, 44 insertions(+), 16 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 46e6aa9..ff54c75 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,7 @@ ## 5.2.0 -* Expose option for "fastcollate" to allow proxy for <=5.0.5 processing methods when combined with `-dupmode t`. +* Expose option for "legacy" to allow for <=5.0.5 processing methods. ## 5.1.0 diff --git a/bin/bwa_mem.pl b/bin/bwa_mem.pl index 419e7ff..0bab214 100755 --- a/bin/bwa_mem.pl +++ b/bin/bwa_mem.pl @@ -136,7 +136,7 @@ sub setup { 'qf|mmqcfrac:f' => \$opts{'mmqcfrac'}, 'bm2|bwamem2' => \$opts{'bwamem2'}, 'd|dupmode:s' => \$opts{'dupmode'}, - 'fc|fastcollate' => \$opts{'fastcollate'}, + 'legacy' => \$opts{'legacy'}, 'ss|seqslice:i' => $opts{'seqslice'}, ) or pod2usage(2); @@ -179,10 +179,10 @@ sub setup { delete $opts{'mmqc'} unless(defined $opts{'mmqc'}); delete $opts{'csi'} unless(defined $opts{'csi'}); delete $opts{'bwamem2'} unless(defined $opts{'bwamem2'}); - delete $opts{'fastcollate'} unless(defined $opts{'fastcollate'}); + delete $opts{'legacy'} unless(defined $opts{'legacy'}); - if(defined $opts{'bwamem2'} && defined $opts{'fastcollate'}) { - warn "WARN: Use of options bwamem2 and fastcollate is suboptimal, proceeding but memory will be excessive.\n"; + if(defined $opts{'bwamem2'} && defined $opts{'legacy'}) { + warn "WARN: Use of options bwamem2 and legacy is suboptimal, proceeding but memory will be excessive.\n"; } PCAP::Cli::opt_requires_opts('scramble', \%opts, ['cram']); @@ -267,8 +267,9 @@ =head1 SYNOPSIS - Please see 'bwa_mem.pl -m' -mmqcfrac -qf Mismatch fraction for -mmqc [0.05] -dupmode -d see "samtools markdup -m" [t] - -fastcollate -fc Paired with `-dupmode t` equivalent to PCAP-core<=5.0.5 - - Only relevant to BAM/CRAM input + -legacy Equivalent to PCAP-core<=5.0.5 + - bamtofastq instead of samtools collate (for BAM/CRAM input) + - dupmode ignored as uses bammarkduplicates2 - Avoid use with bwamem2 (memory explosion) Targeted processing: @@ -373,13 +374,11 @@ =head2 OPTIONAL parameters Switch between template and sequence based marking. See "samtools markdup" man page for more details -=item B<-fastcollate> +=item B<-legacy> -For BAM/CRAM, brings read pairs together but doesn't result in even distribution of read location during input to -bwa-mem. This can result in errors in read-pair placement and noise in data. This was part of the processing in -versions of PCAP-core <= 5.0.5. +Processing equivalent to versions of PCAP-core <= 5.0.5 (bamtofastq + bammarkduplicates2) -Not compaitible with bwa-mem2. +Not recommended with bwamem2 - memory explosions. =item B<-csi> diff --git a/bin/merge_or_mark.pl b/bin/merge_or_mark.pl index a87c48f..f5b0cc2 100755 --- a/bin/merge_or_mark.pl +++ b/bin/merge_or_mark.pl @@ -90,6 +90,7 @@ sub setup { 'c|cram' => \$opts{'cram'}, 'sc|scramble=s' => \$opts{'scramble'}, 'd|dupmode:s' => \$opts{'dupmode'}, + 'legacy' => \$opts{'legacy'}, 'ss|seqslice:i' => $opts{'seqslice'}, ) or pod2usage(2); @@ -121,6 +122,7 @@ sub setup { delete $opts{'process'} unless(defined $opts{'process'}); delete $opts{'index'} unless(defined $opts{'index'}); + delete $opts{'legacy'} unless(defined $opts{'legacy'}); delete $opts{'scramble'}; delete $opts{'csi'} unless(defined $opts{'csi'}); if($opts{'qnamesort'} && !$opts{'nomarkdup'}){ @@ -183,7 +185,8 @@ =head1 SYNOPSIS -cram -c Output cram, see '-sc' [flag] -seqslice -ss seqs_per_slice for CRAM compression [samtools default: 10000] -scramble -sc DEPRECATED - -dupmode -d see "samtools markdup -m" [t] + -dupmode -d See "samtools markdup -m" [t] + -legacy Use legacy bammarkduplicates2, ignores '-dupmode' Targeted processing: -process -p Only process this step then exit diff --git a/lib/PCAP/Bam.pm b/lib/PCAP/Bam.pm index 19ba85b..4cce3f1 100644 --- a/lib/PCAP/Bam.pm +++ b/lib/PCAP/Bam.pm @@ -154,8 +154,21 @@ sub merge_or_mark_lanes { else { my $merge = sprintf q{%s merge -u -@ %d - %s}, $tools{samtools}, $helper_threads, $input_str; - my $markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -}, + my $markdup; + if(exists $options->{legacy}) { + my $mmflagmod = _which('mmFlagModifier') || die "Unable to find 'mmFlagModifier' in path"; + my $bammarkdups = _which('bammarkduplicates2') || die "Unable to find 'bammarkduplicates2' in path"; + + my $mmQcRemove = sprintf '%s --remove -l 0 -@ %d', $mmflagmod, $helper_threads; + my $bammarkdup = sprintf '%s tmpfile=%s M=%s.met level=0 markthreads=%d', $bammarkdups, $strmd_tmp, $marked, $helper_threads; + my $mmQcReplace = sprintf '%s --replace -l 0 -@ %d', $mmflagmod, $helper_threads; + + $markdup = sprintf q{%s | %s | %s}, $mmQcRemove, $bammarkdup, $mmQcReplace; + } + else { + $markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -}, $tools{samtools}, $options->{dupmode}, $strmd_tmp, $helper_threads, $marked; + } my $compress = sprintf q{%s view -T %s --output-fmt %s -@ %d -}, $tools{samtools}, $options->{reference}, $out_fmt, $helper_threads; my $idx = sprintf q{%s index -@ %d %s - %s.%s}, @@ -254,8 +267,21 @@ sub merge_and_mark_dup { else { my $merge = sprintf q{%s merge -u -@ %d - %s}, $tools{samtools}, $helper_threads, $input_str; - my $markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -}, + my $markdup; + if(exists $options->{legacy}) { + my $mmflagmod = _which('mmFlagModifier') || die "Unable to find 'mmFlagModifier' in path"; + my $bammarkdups = _which('bammarkduplicates2') || die "Unable to find 'bammarkduplicates2' in path"; + + my $mmQcRemove = sprintf '%s --remove -l 0 -@ %d', $mmflagmod, $helper_threads; + my $bammarkdup = sprintf '%s tmpfile=%s M=%s.met level=0 markthreads=%d', $bammarkdups, $strmd_tmp, $marked, $helper_threads; + my $mmQcReplace = sprintf '%s --replace -l 0 -@ %d', $mmflagmod, $helper_threads; + + $markdup = sprintf q{%s | %s | %s}, $mmQcRemove, $bammarkdup, $mmQcReplace; + } + else { + $markdup = sprintf q{%s markdup --mode %s --output-fmt bam,level=0 -S --include-fails -T %s -@ %d -f %s.met - -}, $tools{samtools}, $options->{dupmode}, $strmd_tmp, $helper_threads, $marked; + } my $compress = sprintf q{%s view -T %s --output-fmt %s -@ %d -}, $tools{samtools}, $options->{reference}, $out_fmt, $helper_threads; my $idx = sprintf q{%s index -@ %d %s - %s.%s}, diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index dd6c356..048d60a 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -215,7 +215,7 @@ sub split_in { my $mmQcStrip = sprintf '%s --remove -l 0 -@ %d -i %s', _which('mmFlagModifier'), $helpers, $input->in; my $view = sprintf '%s view %s -bu -T %s -F 2816 -@ %d -', $samtools, $TAG_STRIP, $options->{'reference'}, $helpers; # leave my $collate_split; - if(exists $options->{fastcollate}) { + if(exists $options->{legacy}) { my $bamtofastq = _which('bamtofastq') || die "Unable to find 'bamtofastq' in path"; $collate_split = sprintf '%s exclude=QCFAIL,SECONDARY,SUPPLEMENTARY tryoq=1 gz=1 level=1 outputperreadgroup=1 outputperreadgroupsuffixF=_i.fq outputperreadgroupsuffixF2=_i.fq T=%s/bamtofastq outputdir=%s split=%s', $bamtofastq, $collate_folder, $split_folder, From d641be69ed6bb62300ed6bd9f92baa41794afd48 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Thu, 2 Jul 2020 09:40:59 +0000 Subject: [PATCH 10/10] More details of the changes --- CHANGES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index ff54c75..d3a7ab9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,9 @@ ## 5.2.0 * Expose option for "legacy" to allow for <=5.0.5 processing methods. + * `bamtofastq` when pulling reads from BAM/CRAM input. + * `bammarkduplicates2` for duplicate marking. + * Affects `bwa_mem.pl` and `merge_or_mark.pl` ## 5.1.0