diff --git a/CHANGES.md b/CHANGES.md index f6bca2e..5f77972 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,10 @@ +### 1.11.0 +* bam_stats - new rna switch to give more appropriate insert size stats +* bam_stats - more robust handling of optional RG header entries +* bam_stats - allows streaming IO (thanks to @jenniferliddle) +* bwa_mem.pl - Handle `'` in RG header line/IDs +* Generally improved version handling and updated versions of some tools. + ### 1.9.1 * Changed final log folder to include sample name and analysis type, prevents clash when lots of data to same output loc. diff --git a/MYMETA.json b/MYMETA.json deleted file mode 100644 index 5e35ff9..0000000 --- a/MYMETA.json +++ /dev/null @@ -1,61 +0,0 @@ -{ - "abstract" : "unknown", - "author" : [ - "unknown" - ], - "dynamic_config" : 0, - "generated_by" : "ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.142690", - "license" : [ - "gpl_2" - ], - "meta-spec" : { - "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", - "version" : "2" - }, - "name" : "PCAP", - "no_index" : { - "directory" : [ - "t", - "inc" - ] - }, - "prereqs" : { - "build" : { - "requires" : { - "ExtUtils::MakeMaker" : "0" - } - }, - "configure" : { - "requires" : { - "ExtUtils::MakeMaker" : "0" - } - }, - "runtime" : { - "requires" : { - "Bio::DB::Sam" : "1.39", - "Bio::Root::Version" : "1.006923", - "Capture::Tiny" : "0.24", - "Config::IniFiles" : "2.83", - "Const::Fast" : "0.014", - "Data::UUID" : "1.219", - "Devel::Cover" : "1.09", - "File::Which" : "0.05", - "GD" : "2.52", - "IPC::System::Simple" : "1.25", - "List::Util" : "1.38", - "Math::Gradient" : "0.04", - "Module::Build" : "0.42", - "Pod::Coverage" : "0.23", - "Proc::PID::File" : "1.27", - "Proc::ProcessTable" : "0.5", - "Sub::Exporter::Progressive" : "0.001011", - "Term::UI" : "0.42", - "Test::Fatal" : "0.013", - "Try::Tiny" : "0.19", - "XML::Simple" : "2.2" - } - } - }, - "release_status" : "stable", - "version" : "v1.10.0" -} diff --git a/MYMETA.yml b/MYMETA.yml deleted file mode 100644 index 3e62035..0000000 --- a/MYMETA.yml +++ /dev/null @@ -1,42 +0,0 @@ ---- -abstract: unknown -author: - - unknown -build_requires: - ExtUtils::MakeMaker: '0' -configure_requires: - ExtUtils::MakeMaker: '0' -dynamic_config: 0 -generated_by: 'ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.142690' -license: gpl -meta-spec: - url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: '1.4' -name: PCAP -no_index: - directory: - - t - - inc -requires: - Bio::DB::Sam: '1.39' - Bio::Root::Version: '1.006923' - Capture::Tiny: '0.24' - Config::IniFiles: '2.83' - Const::Fast: '0.014' - Data::UUID: '1.219' - Devel::Cover: '1.09' - File::Which: '0.05' - GD: '2.52' - IPC::System::Simple: '1.25' - List::Util: '1.38' - Math::Gradient: '0.04' - Module::Build: '0.42' - Pod::Coverage: '0.23' - Proc::PID::File: '1.27' - Proc::ProcessTable: '0.5' - Sub::Exporter::Progressive: '0.001011' - Term::UI: '0.42' - Test::Fatal: '0.013' - Try::Tiny: '0.19' - XML::Simple: '2.2' -version: v1.10.0 diff --git a/Makefile.PL b/Makefile.PL index edf419e..ad346ee 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -61,6 +61,7 @@ WriteMakefile( 'Term::UI' => 0.42, # currently in core but scheduled for removal 5.17, no alternative recommended 'Sub::Exporter::Progressive' => 0.001011, 'XML::Simple' => 2.20, + 'version' => 0.9912, } ); diff --git a/bin/gnos_pull.pl b/bin/gnos_pull.pl index 82b9d19..693c01a 100755 --- a/bin/gnos_pull.pl +++ b/bin/gnos_pull.pl @@ -49,7 +49,7 @@ const my @ANALYSIS_TYPES => (qw(ALIGNMENTS CALLS)); const my @AVAILABLE_COMPOSITE_FILTERS => (qw(caller max_dataset_GB multi_tumour sanger_version broad_version dkfz_embl_version jamboree_approved manual_donor_blacklist)); const my $DEFAULT_URL => 'http://pancancer.info/gnos_metadata/latest'; -const my $GTDL_COMMAND => '%s%s --max-children 3 --rate-limit 200 -v -c %s -d %scghub/data/analysis/download/%s -p %s'; +const my $GTDL_COMMAND => '%s%s --max-children 3 --rate-limit 200 -vv -c %s -d %scghub/data/analysis/download/%s -p %s'; { my $options = option_builder(); @@ -139,28 +139,35 @@ sub pull_data { make_path($symbase) unless(-e $symbase); my $code_ref; + my $check_ref; if($options->{'analysis'} eq 'CALLS') { $code_ref = \&pull_calls; } elsif($options->{'analysis'} eq 'ALIGNMENTS') { + $check_ref = \&check_alignments; $code_ref = \&pull_alignments; } my $thread_count = $options->{'threads'}; - if($thread_count > 1) { - while(@{$to_process} > 0) { + + while(@{$to_process} > 0) { + my $donor = shift @{$to_process}; + my $donor_uniq = $donor->{'donor_unique_id'}; + # is PROJECT::donor, so convert to folder + $donor_uniq =~ s|[:]{2}|/|; + my $donor_base = "$symbase/$donor_uniq"; + my $orig_base = "$outbase/$donor_uniq"; + make_path($orig_base) unless(-e $orig_base); + + if(defined $check_ref) { + next if($check_ref->($options, $donor, $orig_base, $donor_base) == 0); + } + + warn "Submitting: $donor->{donor_unique_id}\n"; + if($thread_count > 1) { if(threads->list(threads::all) < $thread_count) { - my $donor = shift @{$to_process}; - my $donor_uniq = $donor->{'donor_unique_id'}; - # is PROJECT::donor, so convert to folder - $donor_uniq =~ s|[:]{2}|/|; - my $donor_base = "$symbase/$donor_uniq"; - my $orig_base = "$outbase/$donor_uniq"; - make_path($orig_base) unless(-e $orig_base); - warn "Submitted: $donor->{donor_unique_id}\n"; threads->create($code_ref, $options, $donor, $orig_base, $donor_base); - # don't sleep if not full yet next if(threads->list(threads::all) < $thread_count); } @@ -170,6 +177,11 @@ sub pull_data { if(my $err = $thr->error) { die "Thread error: $err\n"; } } } + else { + $code_ref->($options, $donor, $orig_base, $donor_base); + } + } + if($thread_count > 1) { # last gasp for any remaining threads sleep 1 while(threads->list(threads::running) > 0); for my $thr(threads->list(threads::joinable)) { @@ -177,17 +189,6 @@ sub pull_data { if(my $err = $thr->error) { die "Thread error: $err\n"; } } } - else { - for my $donor(@{$to_process}) { - my $donor_uniq = $donor->{'donor_unique_id'}; - # is PROJECT::donor, so convert to folder - $donor_uniq =~ s|[:]{2}|/|; - my $donor_base = "$symbase/$donor_uniq"; - my $orig_base = "$outbase/$donor_uniq"; - make_path($orig_base) unless(-e $orig_base); - $code_ref->($options, $donor, $orig_base, $donor_base); - } - } } sub check_or_create_symlink { @@ -208,6 +209,51 @@ sub check_or_create_symlink { return 1; } +sub check_alignments { + my ($options, $donor, $outbase, $donor_base) = @_; + warn "Checking $donor->{donor_unique_id}\n"; + my $to_do = 0; + # for normal: + $to_do += check_bam($options, $donor->{'donor_unique_id'}, $donor->{'normal_alignment_status'}, $outbase, $donor_base, 'normal'); + + # for tumour + for my $tumour_data(@{$donor->{'tumor_alignment_status'}}) { + $to_do += check_bam($options, $donor->{'donor_unique_id'}, $tumour_data, $outbase, $donor_base, 'tumour'); + } + return $to_do; +} + +sub check_bam { + my ($options, $donor_id, $bam_data, $outbase, $donor_base, $type) = @_; + my $repo = select_repo($options, $bam_data->{'aligned_bam'}->{'gnos_repo'}); + unless(exists $options->{'keys'}->{$repo}) { + warn sprintf "Skipping %s BAM for Donor %s - No permission key for repo %s", $type, $donor_id, $repo; + return 0; + } + + my $gnos_id = $bam_data->{'aligned_bam'}->{'gnos_id'}; + + my $f_base = $outbase.'/'.$gnos_id; + my $success = $f_base.'/.success.t'; + + my $sample_base = "$donor_base/$type"; + + make_path($sample_base) unless(-e $sample_base); + my $aliq_id = $bam_data->{'aliquot_id'}; + my $orig_bam = $f_base.'/'.$bam_data->{'aligned_bam'}->{'bam_file_name'}; + my $sym_bam = $sample_base.'/'.$bam_data->{'aliquot_id'}.'.bam'; + + if(-e $success) { + check_or_create_symlink($orig_bam, $sym_bam); + check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai'); + create_bas($repo, $gnos_id, $sym_bam); + return 0; + } + + return 0 if($options->{'symlinks'}); + return 1; +} + sub pull_alignments { my ($options, $donor, $outbase, $donor_base) = @_; @@ -222,7 +268,6 @@ sub pull_alignments { sub pull_bam { my ($options, $donor_id, $bam_data, $outbase, $donor_base, $type) = @_; - my $sample_base = "$donor_base/$type"; my $repo = select_repo($options, $bam_data->{'aligned_bam'}->{'gnos_repo'}); unless(exists $options->{'keys'}->{$repo}) { @@ -231,15 +276,11 @@ sub pull_bam { } my $gnos_id = $bam_data->{'aligned_bam'}->{'gnos_id'}; - my $download = sprintf $GTDL_COMMAND, $options->{'gtdownload'}, - $options->{'gt_timeout'}, - $options->{'keys'}->{$repo}, - $repo, - $gnos_id, - $outbase; my $f_base = $outbase.'/'.$gnos_id; my $success = $f_base.'/.success.t'; + my $sample_base = "$donor_base/$type"; + make_path($sample_base) unless(-e $sample_base); my $aliq_id = $bam_data->{'aliquot_id'}; my $orig_bam = $f_base.'/'.$bam_data->{'aligned_bam'}->{'bam_file_name'}; @@ -253,6 +294,13 @@ sub pull_bam { } return if($options->{'symlinks'}); + my $download = sprintf $GTDL_COMMAND, $options->{'gtdownload'}, + $options->{'gt_timeout'}, + $options->{'keys'}->{$repo}, + $repo, + $gnos_id, + $outbase; + my $out_file = "$f_base.out.log"; my $err_file = "$f_base.err.log"; $download .= " > $f_base.out.log"; @@ -286,7 +334,7 @@ sub create_bas { my ($repo, $gnos_id, $sym_bam) = @_; my $bas_file = $sym_bam.'.bas'; unlink $bas_file if(-l $bas_file); - return if(-s $bas_file); + return 1 if(-s $bas_file); # pull the bas file, done here to handle back fill of this data my $get_bas = sprintf '%s %s/xml_to_bas.pl -d %scghub/metadata/analysisFull/%s -o %s -b %s', $^X, @@ -325,6 +373,7 @@ sub pull_calls { my @callers = @{$donor->{'flags'}->{'variant_calling_performed'}}; # this should loop over the data found in 'is_sanger_vcf_in_jamboree' once modified to be array for my $caller(@callers) { + next if($caller eq 'embl' || $caller eq 'dkfz'); next if(exists $options->{'COMPOSITE_FILTERS'}->{'caller'} && index(','.$options->{'COMPOSITE_FILTERS'}->{'caller'}.',', ','.$caller.',') == -1); next unless($donor->{'flags'}->{'is_'.$caller.'_variant_calling_performed'}); diff --git a/c/Makefile b/c/Makefile index c029efe..382291e 100644 --- a/c/Makefile +++ b/c/Makefile @@ -99,9 +99,10 @@ valgrind: .c.o: $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ -clean: +clean: @echo clean - $(RM) ./*.o *~ $(BAM_STATS_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno + $(RM) ./*.o *~ $(BAM_STATS_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno + -rm -rf $(HTSTMP) depend: $(SRCS) diff --git a/c/bam_access.c b/c/bam_access.c index 0232a0f..e29c706 100644 --- a/c/bam_access.c +++ b/c/bam_access.c @@ -37,7 +37,7 @@ void parse_rg_line(char *tmp_line, const int idx, rg_info_t **groups, while(tag != NULL){ int chk = sscanf(tag,"ID:%[^\t\n]",id); if(chk == 1){ - groups[idx]->id = malloc(sizeof(char) * 1000); + groups[idx]->id = (char *) malloc(sizeof(char) * 1000); strcpy(groups[idx]->id,id); tag = strtok(NULL,"\t"); continue; @@ -111,22 +111,41 @@ rg_info_t **parse_header(bam_hdr_t *head, int *grps_size, stats_rd_t ****grp_sta check_mem(groups[idx]); char *id = malloc(sizeof(char) * 1000); check_mem(id); - char *sm =malloc(sizeof(char) * 1000); + id[0] = '\0'; + char *sm = malloc(sizeof(char) * 1000); check_mem(sm); + sm[0] = '\0'; char *pl = malloc(sizeof(char) * 1000); check_mem(pl); + pl[0] = '\0'; char *pu = malloc(sizeof(char) * 1000); check_mem(pu); + pu[0] = '\0'; char *lib = malloc(sizeof(char) * 1000); check_mem(lib); + lib[0] = '\0'; char * tmp = malloc(sizeof(char) * (strlen(line)+1)); strcpy(tmp,line); parse_rg_line(tmp,idx,groups,id,sm,pl,pu,lib); - check(id != NULL,"Error recognising ID from RG line."); - check(sm != NULL,"Error recognising SM from RG line."); + check((id != NULL),"Error recognising ID from RG line. NULL found."); + check((strcmp(id,"")!=0),"Error recognising ID from RG line. Empty string."); + check((id[0]!='\0'),"Error recognising ID from RG line. Empty string."); + check((sm != NULL),"Error recognising SM from RG line."); + if((sm[0]=='\0')){ + groups[idx]->sample = "."; + } check(pl != NULL,"Error recognising PL from RG line."); + if((pl[0]=='\0')){ + groups[idx]->platform = "."; + } check(lib != NULL,"Error recognising LB from RG line."); + if((lib[0]=='\0')){ + groups[idx]->lib = "."; + } check(pu != NULL,"Error recognising PU from RG line."); + if((pu[0]=='\0')){ + groups[idx]->platform_unit = "."; + } free (tmp); idx++; }//End of iteration through header lines. @@ -188,7 +207,7 @@ rg_info_t **parse_header(bam_hdr_t *head, int *grps_size, stats_rd_t ****grp_sta return NULL; } -int process_reads(htsFile *input, bam_hdr_t *head, rg_info_t **grps, int grps_size, stats_rd_t ****grp_stats){ +int process_reads(htsFile *input, bam_hdr_t *head, rg_info_t **grps, int grps_size, stats_rd_t ****grp_stats, int rna){ assert(input != NULL); assert(head != NULL); assert(grps != NULL); @@ -198,7 +217,7 @@ int process_reads(htsFile *input, bam_hdr_t *head, rg_info_t **grps, int grps_si b = bam_init1(); int ret; while((ret = sam_read1(input, head, b)) >= 0){ - if (b->core.flag & BAM_FSECONDARY) continue; //skip secondary hits so no double counts + if (b->core.flag & BAM_FSECONDARY && rna == 0) continue; //skip secondary hits so no double counts if (b->core.flag & BAM_FQCFAIL) continue; // skip vendor fail as generally aren't considered if (b->core.flag & BAM_FSUPPLEMENTARY) continue; // skip supplimentary diff --git a/c/bam_access.h b/c/bam_access.h index ca2a57b..3dbccc9 100644 --- a/c/bam_access.h +++ b/c/bam_access.h @@ -54,7 +54,7 @@ typedef struct{ rg_info_t **parse_header(bam_hdr_t *head, int *grps_size, stats_rd_t ****grp_stats); -int process_reads(htsFile *input, bam_hdr_t *head, rg_info_t **grps, int grps_size, stats_rd_t ****grp_stats); +int process_reads(htsFile *input, bam_hdr_t *head, rg_info_t **grps, int grps_size, stats_rd_t ****grp_stats, int rna); uint64_t get_mapped_base_count_from_cigar(bam1_t *b); diff --git a/c/bam_stats.c b/c/bam_stats.c index 257c7cc..6e96ff9 100644 --- a/c/bam_stats.c +++ b/c/bam_stats.c @@ -31,9 +31,10 @@ #include "khash.h" -static char *input_file; +static char *input_file = NULL; static char *output_file; static char *ref_file; +static int rna = 0; int grps_size = 0; stats_rd_t*** grp_stats; static char *bas_header = "bam_filename\tsample\tplatform\tplatform_unit\tlibrary\treadgroup\tread_length_r1\tread_length_r2\t#_mapped_bases\t#_mapped_bases_r1\t#_mapped_bases_r2\t#_divergent_bases\t#_divergent_bases_r1\t#_divergent_bases_r2\t#_total_reads\t#_total_reads_r1\t#_total_reads_r2\t#_mapped_reads\t#_mapped_reads_r1\t#_mapped_reads_r2\t#_mapped_reads_properly_paired\t#_gc_bases_r1\t#_gc_bases_r2\tmean_insert_size\tinsert_size_sd\tmedian_insert_size\t#_duplicate_reads\n"; @@ -62,7 +63,8 @@ void print_usage (int exit_code){ printf ("Optional:\n"); printf ("-r --ref-file File path to reference index (.fai) file.\n"); printf (" NB. If cram format is supplied via -b and the reference listed in the cram header can't be found bam_stats may fail to work correctly.\n"); - printf ("-p --plots Folder to contain quality score plots.\n\n"); + printf ("-a --rna Uses the RNA method of calculating insert size (ignores anything outside ± ('sd'*standard_dev) of the mean in calculating a new mean)\n"); + printf ("Other:\n"); printf ("-h --help Display this usage information.\n"); printf ("-v --version Prints the version number.\n\n"); @@ -80,7 +82,7 @@ void options(int argc, char *argv[]){ {"input",required_argument,0,'i'}, {"ref-file",required_argument,0,'r'}, {"output",required_argument,0,'o'}, - {"plots",required_argument,0,'p'}, + {"rna",no_argument,0, 'a'}, { NULL, 0, NULL, 0} }; //End of declaring opts @@ -89,7 +91,7 @@ void options(int argc, char *argv[]){ int iarg = 0; //Iterate through options - while((iarg = getopt_long(argc, argv, "i:o:r:pvh", long_opts, &index)) != -1){ + while((iarg = getopt_long(argc, argv, "i:o:r:vha", long_opts, &index)) != -1){ switch(iarg){ case 'i': input_file = optarg; @@ -102,12 +104,11 @@ void options(int argc, char *argv[]){ case 'r': ref_file = optarg; break; - - case 'p': - print_usage(1); - //plots = optarg; - break; - + + case 'a': + rna = 1; + break; + case 'h': print_usage(0); break; @@ -128,9 +129,17 @@ void options(int argc, char *argv[]){ }//End of iteration through options //Do some checking to ensure required arguments were passed and are accessible files - if(check_exist(input_file) != 1){ - printf("Input file (-i) %s does not exist.\n",input_file); - print_usage(1); + if (input_file==NULL || strcmp(input_file,"/dev/stdin")==0) { + input_file = "-"; // htslib recognises this as a special case + } + if (strcmp(input_file,"-") != 0) { + if(check_exist(input_file) != 1){ + printf("Input file (-i) %s does not exist.\n",input_file); + print_usage(1); + } + } + if (output_file==NULL || strcmp(output_file,"/dev/stdout")==0) { + output_file = "-"; // we recognise this as a special case } if(ref_file){ if(check_exist(ref_file) != 1){ @@ -154,16 +163,6 @@ int calculate_mean_sd_median_insert_size(khash_t(ins) *inserts,double *mean, dou tt_mean += val; }); - /* - int i = 0; - for(i=0;i<200000;i++){ - if(inserts[i]>0){ - pp_mean += (i+1) * inserts[i]; - tt_mean += inserts[i]; - } - } - */ - if(tt_mean){//Calculate mean , median, sd *mean = (double) ((double)pp_mean/(double)tt_mean); @@ -180,18 +179,6 @@ int calculate_mean_sd_median_insert_size(khash_t(ins) *inserts,double *mean, dou prev_insert = key; }); - /* - int j=0; - - for(j=0;j<200000;j++){ - if(inserts[j]>0){ - insert = j+1; - running_total += inserts[j]; - if(running_total >= midpoint) break ; - prev_insert = j+1; - } - }*/ - if(tt_mean %2 == 0 && ( running_total - midpoint2 >= insert )){ //warn "Thinks is even AND split between bins "; *median = (((double)insert + (double)prev_insert) / (double)2); @@ -210,16 +197,7 @@ int calculate_mean_sd_median_insert_size(khash_t(ins) *inserts,double *mean, dou tt_sd += val; }); - /* - int k=0; - for(k=0;k<200000;k++){ - if(inserts[k]>0){ - double diff = (double)(k+1) - (*mean); - pp_sd += (diff * diff) * inserts[k]; - tt_sd += inserts[k]; - } - }*/ - kh_destroy(ins, inserts); + if(tt_sd){ double variance = fabs((double)((double)pp_sd / (double)tt_sd)); @@ -227,13 +205,20 @@ int calculate_mean_sd_median_insert_size(khash_t(ins) *inserts,double *mean, dou }else{ *sd = 0; } + + kh_destroy(ins, inserts); } //End of if we have data to calculate from. return 0; } int print_results(rg_info_t **grps){ - FILE *out = fopen(output_file,"w"); + FILE *out; + if (strcmp(output_file,"-")==0) { + out = stdout; + } else { + out = fopen(output_file,"w"); + } check(out != NULL,"Error trying to open output file %s for writing.",output_file); int chk = fprintf(out,"%s",bas_header); @@ -327,11 +312,11 @@ int print_results(rg_info_t **grps){ fflush(out); } - fclose(out); + if (out != stdout) fclose(out); return 0; error: - if(out) fclose(out); + if(out && out != stdout) fclose(out); return -1; } @@ -356,7 +341,7 @@ int main(int argc, char *argv[]){ check(grps != NULL, "Error fetching read groups from header."); //Process every read in bam file. - int check = process_reads(input,head,grps, grps_size, &grp_stats); + int check = process_reads(input,head,grps, grps_size, &grp_stats, rna); check(check==0,"Error processing reads in bam file."); int res = print_results(grps); diff --git a/docs.tar.gz b/docs.tar.gz index c0f47f4..39837c4 100644 Binary files a/docs.tar.gz and b/docs.tar.gz differ diff --git a/lib/PCAP.pm b/lib/PCAP.pm index d451883..4101943 100644 --- a/lib/PCAP.pm +++ b/lib/PCAP.pm @@ -24,7 +24,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '1.10.0'; +our $VERSION = '1.11.0'; our @EXPORT = qw($VERSION); const my $LICENSE => @@ -64,14 +64,15 @@ const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,bwa,samtools', '1.6.2' => 'biobambam,bwa', '1.6.3' => 'biobambam,bwa', '1.7.0' => 'biobambam,bwa', - '1.7.1' => 'bwa', - '1.8.0' => '', - '1.8.1' => '', - '1.8.2' => '', - '1.9.0' => '', - '1.9.1' => '', - '1.9.4' => '', - '1.10.0' => '', + '1.7.1' => 'biobambam,bwa', + '1.8.0' => 'biobambam', + '1.8.1' => 'biobambam', + '1.8.2' => 'biobambam', + '1.9.0' => 'biobambam', + '1.9.1' => 'biobambam', + '1.9.4' => 'biobambam', + '1.10.0' => 'biobambam', + '1.11.0' => 'biobambam', ); sub license { diff --git a/lib/PCAP/Bam.pm b/lib/PCAP/Bam.pm index e2218b7..457c0e1 100644 --- a/lib/PCAP/Bam.pm +++ b/lib/PCAP/Bam.pm @@ -107,7 +107,7 @@ sub merge_and_mark_dup { $helper_threads = 1 if($helper_threads < 1); # uncoverable branch true # uncoverable branch false - my $command = which('bammarkduplicates') || die "Unable to find 'bammarkduplicates' in path"; + my $command = which('bammarkduplicates2') || die "Unable to find 'bammarkduplicates' in path"; $command .= sprintf $BAMBAM_DUP, $marked, $met, File::Spec->catfile($tmp, 'biormdup'), diff --git a/lib/PCAP/Bwa.pm b/lib/PCAP/Bwa.pm index c160081..2083863 100644 --- a/lib/PCAP/Bwa.pm +++ b/lib/PCAP/Bwa.pm @@ -222,7 +222,10 @@ sub bwa_mem { else { my ($rg) = $split =~ m|/split/[[:digit:]]+/(.+)_i.fq_[[:digit:]]+.gz$|; ($rg_line, undef) = PCAP::Bam::rg_line_for_output($input->in, $options->{'sample'}, undef, $rg); - unless($rg_line) { + if($rg_line) { + $rg_line =~ s/('+)/'"$1"'/g; + } + else { $rg_line = sprintf $FALSE_RG, $split_element, $options->{'sample'}; } $rg_line = q{'}.$rg_line.q{'}; @@ -244,12 +247,14 @@ sub bwa_mem { # uncoverable branch true # uncoverable branch false if($input->paired_fq) { + $split =~ s/'/\\'/g; my $split2 = $split; $split2 =~ s/pairedfq1(\.[[:digit:]]+)/pairedfq2$1/; $bwa .= ' '.$split; $bwa .= ' '.$split2; } else { + $split =~ s/'/\\'/g; $bwa .= ' '.$split; } $command .= $bwa; @@ -261,6 +266,7 @@ sub bwa_mem { my $sorted_bam_stub = $split; $sorted_bam_stub =~ s|/split/([[:digit:]]+)/(.+)$|/sorted/$1_$2|; + $sorted_bam_stub =~ s/\\'/-/g; my $ref = exists $options->{'decomp_ref'} ? $options->{'decomp_ref'} : $options->{'reference'}; my $sort = which('bamsort') || die "Unable to find 'bamsort' in path\n"; diff --git a/lib/PCAP/Threaded.pm b/lib/PCAP/Threaded.pm index 0189954..a7b03e0 100644 --- a/lib/PCAP/Threaded.pm +++ b/lib/PCAP/Threaded.pm @@ -30,7 +30,7 @@ use Carp qw( croak ); use Config; # so we can see if threads are enabled use File::Spec; use File::Path qw(make_path); -use Try::Tiny qw(try catch); +use Try::Tiny qw(try catch finally); use Capture::Tiny qw(capture); use IO::File; @@ -208,6 +208,18 @@ sub external_process_handler { } } catch { die $_ if($_); + } + finally { + if($out_fh) { + $out_fh->flush; + $out_fh->close; + undef $out_fh; + } + if($err_fh) { + $err_fh->flush; + $err_fh->close; + undef $err_fh; + } }; } diff --git a/prerelease.sh b/prerelease.sh index f7c1c61..f0b0000 100755 --- a/prerelease.sh +++ b/prerelease.sh @@ -1,6 +1,7 @@ #!/bin/bash set -eu # exit on first error or undefined value in subtitution +set -o pipefail # get current directory INIT_DIR=`pwd` @@ -25,10 +26,6 @@ export HARNESS_PERL_SWITCHES=-MDevel::Cover=-db,reports,-ignore,'t/.*\.t' rm -rf docs mkdir -p docs/reports_text prove --nocolor -I ./lib | sed 's/^/ /' # indent output of prove -if [[ $? -ne 0 ]] ; then - echo "\n\tERROR: TESTS FAILED\n" - exit 1 -fi echo '### Generating test/pod coverage reports ###' # removed 'condition' from coverage as '||' 'or' doesn't work properly diff --git a/setup.sh b/setup.sh index 7e4a0e6..986dbce 100755 --- a/setup.sh +++ b/setup.sh @@ -11,7 +11,7 @@ SOURCE_HTSLIB="https://github.com/samtools/htslib/archive/1.2.1.tar.gz" SOURCE_JKENT_BIN="https://github.com/ENCODE-DCC/kentUtils/raw/master/bin/linux.x86_64" # for biobambam -SOURCE_BBB_BIN_DIST="https://github.com/gt1/biobambam/releases/download/0.0.191-release-20150401083643/biobambam-0.0.191-release-20150401083643-x86_64-etch-linux-gnu.tar.gz" +SOURCE_BBB_BIN_DIST="https://github.com/gt1/biobambam2/releases/download/2.0.25-release-20151105154334/biobambam2-2.0.25-release-20151105154334-x86_64-etch-linux-gnu.tar.gz" done_message () { if [ $? -eq 0 ]; then @@ -210,10 +210,9 @@ if [[ ",$COMPILE," == *,biobambam,* ]] ; then cd $SETUP_DIR mkdir -p biobambam get_distro "biobambam" $SOURCE_BBB_BIN_DIST - mkdir -p $INST_PATH/bin $INST_PATH/include $INST_PATH/lib $INST_PATH/share - rm -f biobambam/bin/curl* # breaks OS installs + mkdir -p $INST_PATH/bin $INST_PATH/etc $INST_PATH/lib $INST_PATH/share cp -r biobambam/bin/* $INST_PATH/bin/. - cp -r biobambam/include/* $INST_PATH/include/. + cp -r biobambam/etc/* $INST_PATH/etc/. cp -r biobambam/lib/* $INST_PATH/lib/. cp -r biobambam/share/* $INST_PATH/share/. touch $SETUP_DIR/biobambam.success diff --git a/t/3_external_progs.t b/t/3_external_progs.t index fa57d29..2675453 100644 --- a/t/3_external_progs.t +++ b/t/3_external_progs.t @@ -10,29 +10,30 @@ use List::Util qw(first); use Const::Fast qw(const); use Capture::Tiny qw(capture); use Data::Dumper; +use version 0.77; -const my @REQUIRED_PROGRAMS => qw(bamcollate2 bammarkduplicates bamsort bwa); -const my $BIOBAMBAM_VERSION => ['0.0.191']; -const my $BWA_VERSIONS => ['0.7.12']; +const my @REQUIRED_PROGRAMS => qw(bamcollate2 bammarkduplicates2 bamsort bwa); +const my $BIOBAMBAM2_VERSION => '2.0.25'; +const my $BWA_VERSION => '0.7.12'; # can't put regex in const my %EXPECTED_VERSION = ( 'bamcollate2' => { 'get' => q{ -h}, - 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => $BIOBAMBAM_VERSION}, - 'bammarkduplicates' => { + 'match' => qr/This is biobambam2 version ([[:digit:]\.]+)\./, + 'version' => version->parse($BIOBAMBAM2_VERSION)}, + 'bammarkduplicates2' => { 'get' => q{ -h}, - 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => $BIOBAMBAM_VERSION}, + 'match' => qr/This is biobambam2 version ([[:digit:]\.]+)\./, + 'version' => version->parse($BIOBAMBAM2_VERSION)}, 'bamsort' => { 'get' => q{ -h}, - 'match' => qr/This is biobambam version ([[:digit:]\.]+)\./, - 'version' => $BIOBAMBAM_VERSION}, + 'match' => qr/This is biobambam2 version ([[:digit:]\.]+)\./, + 'version' => version->parse($BIOBAMBAM2_VERSION)}, 'bwa' => { 'get' => q{}, 'match' => qr/Version: ([[:digit:]\.]+[[:alpha:]]?)/, # we don't care about the revision number - 'version' => $BWA_VERSIONS}, + 'version' => version->parse($BWA_VERSION)}, ); subtest 'External programs exist on PATH' => sub { @@ -50,8 +51,9 @@ subtest 'External programs have expected version' => sub { my ($stdout, $stderr, $exit) = capture{ system($command); }; my $reg = $details->{'match'}; my ($version) = $stderr =~ /$reg/m; - my $found = first {$version eq $_} @{$details->{'version'}}; - ok($found, sprintf 'Expect version %s for %s', (join q{|}, @{$details->{'version'}}), $prog); + version->parse($version); + + ok(version->parse($version) >= $details->{'version'}, sprintf 'Expect minimum version of %s for %s', $details->{'version'}, $prog); } };