diff --git a/CHANGES.md b/CHANGES.md index 6a84241..57bbcf8 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +### 3.3.0 +* adds `map_threads|mt` option to `bwa_mem.pl` to allow more control of parallel processing in one shot submission. +* adds `bwa_pl|l` option to `bwa_mem.pl` to allow preload of different malloc libraries. + ### 3.2.0 * Move from legacy kent bigwig manipulation code and to [`cgpBigWig`](https://github.com/cancerit/cgpBigWig) * Faster and handles the huge number of contigs in many new reference builds. diff --git a/bin/bwa_mem.pl b/bin/bwa_mem.pl index a86031a..7747aca 100755 --- a/bin/bwa_mem.pl +++ b/bin/bwa_mem.pl @@ -56,7 +56,7 @@ # register processes $threads->add_function('split', \&PCAP::Bwa::split_in); - $threads->add_function('bwamem', \&PCAP::Bwa::bwa_mem, exists $options->{'index'} ? 1 : &PCAP::Bwa::bwa_mem_max_cores); + $threads->add_function('bwamem', \&PCAP::Bwa::bwa_mem, exists $options->{'index'} ? 1 : $options->{'map_threads'}); PCAP::Bwa::mem_setup($options) if(!exists $options->{'process'} || $options->{'process'} eq 'setup'); @@ -89,6 +89,7 @@ sub setup { 'v|version' => \$opts{'v'}, 'j|jobs' => \$opts{'jobs'}, 't|threads=i' => \$opts{'threads'}, + 'mt|map_threads=i' => \$opts{'map_threads'}, 'r|reference=s' => \$opts{'reference'}, 'o|outdir=s' => \$opts{'outdir'}, 's|sample=s' => \$opts{'sample'}, @@ -99,6 +100,7 @@ sub setup { 'b|bwa=s' => \$opts{'bwa'}, 'c|cram' => \$opts{'cram'}, 'sc|scramble=s' => \$opts{'scramble'}, + 'l|bwa_pl=s' => \$opts{'bwa_pl'}, ) or pod2usage(2); pod2usage(-verbose => 1, -exitval => 0) if(defined $opts{'h'}); @@ -129,6 +131,9 @@ sub setup { delete $opts{'index'} unless(defined $opts{'index'}); delete $opts{'bwa'} unless(defined $opts{'bwa'}); delete $opts{'scramble'} unless(defined $opts{'scramble'}); + delete $opts{'bwa_pl'} unless(defined $opts{'bwa_pl'}); + + $opts{'map_threads'} = &PCAP::Bwa::bwa_mem_max_cores unless(defined $opts{'map_threads'}); PCAP::Cli::opt_requires_opts('scramble', \%opts, ['cram']); @@ -188,33 +193,39 @@ =head1 SYNOPSIS bwa_mem.pl [options] [file(s)...] Required parameters: - -outdir -o Folder to output result to. - -reference -r Path to reference genome file *.fa[.gz] - -sample -s Sample name to be applied to output file. - -threads -t Number of threads to use. [1] + -outdir -o Folder to output result to. + -reference -r Path to reference genome file *.fa[.gz] + -sample -s Sample name to be applied to output file. + -threads -t Number of threads to use. [1] Optional parameters: - -fragment -f Split input into fragements of X million repairs [10] - -nomarkdup -n Don't mark duplicates - -cram -c Output cram, see '-sc' - -scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used - - '-I,-O' are used internally and should not be provided - -bwa -b Single quoted string of additional parameters to pass to BWA - - '-t,-p,-R' are used internally and should not be provided + -fragment -f Split input into fragements of X million repairs [10] + -nomarkdup -n Don't mark duplicates + -cram -c Output cram, see '-sc' + -scramble -sc Single quoted string of parameters to pass to Scramble when '-c' used + - '-I,-O' are used internally and should not be provided + -bwa -b Single quoted string of additional parameters to pass to BWA + - '-t,-p,-R' are used internally and should not be provided + -map_threads -mt Number of cores applied to each parallel BWA job when '-t' exceeds this value and '-i' is not in use[6] Targeted processing: - -process -p Only process this step then exit, optionally set -index - bwamem - only applicable if input is bam - mark - Run duplicate marking (-index N/A) - stats - Generates the *.bas file for the final BAM. + -process -p Only process this step then exit, optionally set -index + bwamem - only applicable if input is bam + mark - Run duplicate marking (-index N/A) + stats - Generates the *.bas file for the final BAM. + + -index -i Optionally restrict '-p' to single job + bwamem - 1.. - -index -i Optionally restrict '-p' to single job - bwamem - 1.. + Performance variables + -bwa_pl -l BWA runs ~8% quicker when using the tcmalloc library from + https://github.com/gperftools/ (assuming number of cores not exceeded) + If available specify the path to 'gperftools/lib/libtcmalloc_minimal.so'. Other: - -jobs -j For a parallel step report the number of jobs required - -help -h Brief help message. - -man -m Full documentation. + -jobs -j For a parallel step report the number of jobs required + -help -h Brief help message. + -man -m Full documentation. File list can be full file names or wildcard, e.g. diff --git a/docs.tar.gz b/docs.tar.gz index 30b61e0..7d9c6f5 100644 Binary files a/docs.tar.gz and b/docs.tar.gz differ diff --git a/lib/PCAP.pm b/lib/PCAP.pm index f31480c..ce60b44 100644 --- a/lib/PCAP.pm +++ b/lib/PCAP.pm @@ -27,7 +27,7 @@ use FindBin qw($Bin); use File::Which qw(which); # don't use autodie, only core perl in here -our $VERSION = '3.2.0'; +our $VERSION = '3.3.0'; our @EXPORT = qw($VERSION _which); const my $LICENSE => diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index 27c9d9a..7677209 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -235,10 +235,14 @@ sub bwa_mem { $rg_line = q{'}.$rg_line.q{'}; } - my $threads = $BWA_MEM_MAX_CORES; - $threads = $options->{'threads'} if($options->{'threads'} < $BWA_MEM_MAX_CORES); + my $threads = $options->{'map_threads'}; + $threads = $options->{'threads'} if($options->{'threads'} < $options->{'map_threads'}); - my $bwa = _which('bwa') || die "Unable to find 'bwa' in path"; + my $bwa = q{}; + if(exists $options->{'bwa_pl'}) { + $bwa .= 'LD_PRELOAD='.$options->{'bwa_pl'}.' '; + } + $bwa .= _which('bwa') || die "Unable to find 'bwa' in path"; $ENV{SHELL} = '/bin/bash'; # ensure bash to allow pipefail my $command = 'set -o pipefail; ';