Skip to content

Commit

Permalink
Merge branch 'release/v1.9.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Aug 3, 2015
2 parents 98338fd + af60d4c commit ca830ae
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 80 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
### 1.9.0
* Fix bugs #52 and #53
* Modified bwa_mem.pl to accept multi-readgroup BAM as input

### 1.7.1
* Turns out BWA mem still requires fixmates to get proper isize distributions
* bumped biobambam to [0.0.191](https://github.com/gt1/biobambam/releases/tag/0.0.191-release-20150401083643)
Expand Down
4 changes: 2 additions & 2 deletions MYMETA.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"unknown"
],
"dynamic_config" : 0,
"generated_by" : "ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560",
"generated_by" : "ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.142690",
"license" : [
"gpl_2"
],
Expand Down Expand Up @@ -57,5 +57,5 @@
}
},
"release_status" : "stable",
"version" : "v1.8.2"
"version" : "v1.9.0"
}
52 changes: 26 additions & 26 deletions MYMETA.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,40 @@ abstract: unknown
author:
- unknown
build_requires:
ExtUtils::MakeMaker: 0
ExtUtils::MakeMaker: '0'
configure_requires:
ExtUtils::MakeMaker: 0
ExtUtils::MakeMaker: '0'
dynamic_config: 0
generated_by: 'ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560'
generated_by: 'ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.142690'
license: gpl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: 1.4
version: '1.4'
name: PCAP
no_index:
directory:
- t
- inc
requires:
Bio::DB::Sam: 1.39
Bio::Root::Version: 1.006923
Capture::Tiny: 0.24
Config::IniFiles: 2.83
Const::Fast: 0.014
Data::UUID: 1.219
Devel::Cover: 1.09
File::Which: 0.05
GD: 2.52
IPC::System::Simple: 1.25
List::Util: 1.38
Math::Gradient: 0.04
Module::Build: 0.42
Pod::Coverage: 0.23
Proc::PID::File: 1.27
Proc::ProcessTable: 0.5
Sub::Exporter::Progressive: 0.001011
Term::UI: 0.42
Test::Fatal: 0.013
Try::Tiny: 0.19
XML::Simple: 2.2
version: v1.8.2
Bio::DB::Sam: '1.39'
Bio::Root::Version: '1.006923'
Capture::Tiny: '0.24'
Config::IniFiles: '2.83'
Const::Fast: '0.014'
Data::UUID: '1.219'
Devel::Cover: '1.09'
File::Which: '0.05'
GD: '2.52'
IPC::System::Simple: '1.25'
List::Util: '1.38'
Math::Gradient: '0.04'
Module::Build: '0.42'
Pod::Coverage: '0.23'
Proc::PID::File: '1.27'
Proc::ProcessTable: '0.5'
Sub::Exporter::Progressive: '0.001011'
Term::UI: '0.42'
Test::Fatal: '0.013'
Try::Tiny: '0.19'
XML::Simple: '2.2'
version: v1.9.0
7 changes: 5 additions & 2 deletions bin/bwa_mem.pl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
use autodie qw(:all);
use FindBin qw($Bin);
use lib "$Bin/../lib";
use Cwd qw(abs_path);

use File::Path qw(remove_tree make_path);
use Getopt::Long;
Expand Down Expand Up @@ -51,7 +52,7 @@
my $options = setup();

my $threads = PCAP::Threaded->new($options->{'threads'});
&PCAP::Threaded::disable_out_err if(exists $options->{'index'});
&PCAP::Threaded::disable_out_err if(!exists $options->{'index'} && $options->{'threads'} == 1);

# register processes
$threads->add_function('split', \&PCAP::Bwa::split_in);
Expand Down Expand Up @@ -130,7 +131,9 @@ sub setup {
make_path($logs) unless(-d $logs);

$opts{'tmp'} = $tmpdir;
$opts{'raw_files'} = \@ARGV;
for(@ARGV) {
push @{$opts{'raw_files'}}, abs_path($_);
}

my $max_split = PCAP::Bwa::mem_prepare(\%opts);

Expand Down
99 changes: 75 additions & 24 deletions bin/gnos_pull.pl
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,10 @@
use Const::Fast qw(const);
use File::Copy qw(move);
use File::Fetch;
use File::Path qw(make_path);
use File::Path qw(make_path remove_tree);
use File::Spec;
use File::Which qw(which);
use IO::File;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use JSON qw(decode_json);
use List::Util qw(first any);
use Proc::PID::File;
Expand All @@ -50,7 +49,7 @@
const my @ANALYSIS_TYPES => (qw(ALIGNMENTS CALLS));
const my @AVAILABLE_COMPOSITE_FILTERS => (qw(max_dataset_GB multi_tumour sanger_version jamboree_approved manual_donor_blacklist));
const my $DEFAULT_URL => 'http://pancancer.info/gnos_metadata/latest';
const my $GTDL_COMMAND => '%s%s --max-children 4 --rate-limit 200 -v -c %s -d %scghub/data/analysis/download/%s -p %s';
const my $GTDL_COMMAND => '%s%s --max-children 3 --rate-limit 200 -v -c %s -d %scghub/data/analysis/download/%s -p %s';

{
my $options = option_builder();
Expand Down Expand Up @@ -191,6 +190,24 @@ sub pull_data {
}
}

sub check_or_create_symlink {
my ($source, $target) = @_;
unless(-e $source) {
die "FATAL: Source file doesn't exist: $source\n";
}
if(-l $target) {
my $existing_target = readlink $target;
if($source ne $existing_target) {
unlink $target;
symlink($source, $target);
}
}
else {
symlink($source, $target);
}
return 1;
}

sub pull_alignments {
my ($options, $donor, $outbase, $donor_base) = @_;

Expand Down Expand Up @@ -228,11 +245,17 @@ sub pull_bam {
my $orig_bam = $f_base.'/'.$bam_data->{'aligned_bam'}->{'bam_file_name'};
my $sym_bam = $sample_base.'/'.$bam_data->{'aliquot_id'}.'.bam';

return if(-e $success);
if(-e $success) {
check_or_create_symlink($orig_bam, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai');
create_bas($repo, $gnos_id, $sym_bam);
return;
}
return if($options->{'symlinks'});

my $out_file = "$f_base.out.log";
my $err_file = "$f_base.err.log";
$download .= "> $f_base.out.log";
$download .= " > $f_base.out.log";
$download = "($download) >& $err_file";
warn "Executing: $download\n";

Expand All @@ -248,27 +271,45 @@ sub pull_bam {
unlink $out_file;
unlink $err_file;

symlink($orig_bam, $sym_bam) unless(-e $sym_bam);
symlink($orig_bam.'.bai', $sym_bam.'.bai') unless(-e $sym_bam.'.bai');

# pull the bas file, done here to handle back fill of this data
my $get_bas = sprintf '%s %s/xml_to_bas.pl -d %scghub/metadata/analysisFull/%s -o %s -b %s',
$^X,
$Bin,
$repo,
$gnos_id,
$sym_bam.'.bas',
$sym_bam;
warn "Executing: $get_bas\n";
my ($stdout, $stderr, $exit_c) = capture { system($get_bas); };

die "A problem occured while executing: $get_bas\n\nSTDOUT:\n$stdout\n\nSTDERR:$stderr\n" if($exit_c);
check_or_create_symlink($orig_bam, $sym_bam);
check_or_create_symlink($orig_bam.'.bai', $sym_bam.'.bai');
create_bas($repo, $gnos_id, $sym_bam);

# touch a success file in the output loc
open my $S, '>', $success; close $S;
return 1;
}

sub create_bas {
my ($repo, $gnos_id, $sym_bam) = @_;
my $bas_file = $sym_bam.'.bas';
unlink $bas_file if(-l $bas_file);
return if(-s $bas_file);
# pull the bas file, done here to handle back fill of this data
my $get_bas = sprintf '%s %s/xml_to_bas.pl -d %scghub/metadata/analysisFull/%s -o %s -b %s',
$^X,
$Bin,
$repo,
$gnos_id,
$bas_file,
$sym_bam;
my $success = 0;
for(0..5) {
warn "Executing: $get_bas\n";
my ($stdout, $stderr, $exit_c) = capture { system($get_bas); };
if($exit_c) {
warn "A problem occured while executing: $get_bas\n\nSTDOUT:\n$stdout\n\nSTDERR:$stderr\n...Retry\n";
sleep 30;
}
else {
$success++;
last;
}
}
die "Failed after multiple attempts, aborting bas generation using $get_bas" unless($success);
return 1;
}

sub pull_calls {
my ($options, $donor, $outbase, $donor_base) = @_;
my @callers = @{$donor->{'flags'}->{'variant_calling_performed'}};
Expand Down Expand Up @@ -296,7 +337,12 @@ sub pull_calls {
$outbase;
my $f_base = $outbase.'/'.$gnos_id;
my $success = $f_base.'/.success.t';
next if(-e $success);
if(-e $success) {
make_path($donor_base) unless(-e $donor_base);
check_or_create_symlink($f_base, "$donor_base/$caller");
next;
}
next if($options->{'symlinks'});

my $err_file = "$f_base.err.log";
my $out_fh = IO::File->new("$f_base.out.log", "w+");
Expand All @@ -315,7 +361,7 @@ sub pull_calls {

# now build symlinks
make_path($donor_base) unless(-e $donor_base);
symlink($f_base, "$donor_base/$caller") unless(-e "$donor_base/$caller");
check_or_create_symlink($f_base, "$donor_base/$caller");

# touch a success file in the output loc
open my $S, '>', $success; close $S;
Expand Down Expand Up @@ -356,7 +402,9 @@ sub load_data {
my @to_process;
my %project_dist;
my @filter_keys = keys %{$options->{'FILTERS'}};
my $z = IO::Uncompress::Gunzip->new($gz_manifest, MultiStream => 1) or die "IO::Uncompress::Gunzip failed: $GunzipError\n";
my $command = sprintf 'gunzip -c %s', $gz_manifest;
my ($pid, $z);
$pid = open $z, q{-|}, $command or croak 'Could not fork: '.$!;
DONOR: while(my $jsonl = <$z>) {
my $donor = decode_json($jsonl);

Expand Down Expand Up @@ -489,7 +537,7 @@ sub manifest {
make_path($options->{'outdir'}) unless(-e $options->{'outdir'});
$ff = File::Fetch->new(uri => $to_get);
$where = $ff->fetch( to => $options->{'outdir'});
my $dest = $options->{'outdir'}.'/donor_p.jsonl.gz';
my $dest = $options->{'outdir'}.'/'.$options->{'analysis'}.'_donor_p.jsonl.gz';
move($where, $dest);
return $dest;
}
Expand All @@ -503,6 +551,7 @@ sub option_builder {
'h|help' => \$opts{'h'},
'm|man' => \$opts{'m'},
'i|info' => \$opts{'info'},
's|symlinks' => \$opts{'symlinks'},
'u|url=s' => \$opts{'url'},
't|threads=i' => \$opts{'threads'},
'a|analysis=s' => \$opts{'analysis'},
Expand Down Expand Up @@ -551,6 +600,8 @@ =head1 SYNOPSIS
Other options:
--symlinks (-s) Rebuild symlinks only.
--threads (-t) Number of parallel GNOS retrievals.
--url (-u) The base URL to retrieve jsonl file from
Expand Down
2 changes: 1 addition & 1 deletion c/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
VERSION=1.8.2
VERSION=1.9.0

#Compiler
CC = gcc -O3 -DVERSION='"$(VERSION)"' -g
Expand Down
Binary file modified docs.tar.gz
Binary file not shown.
3 changes: 2 additions & 1 deletion lib/PCAP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use strict;
use Const::Fast qw(const);
use base 'Exporter';

our $VERSION = '1.8.2';
our $VERSION = '1.9.0';
our @EXPORT = qw($VERSION);

const my $LICENSE =>
Expand Down Expand Up @@ -68,6 +68,7 @@ const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,bwa,samtools',
'1.8.0' => '',
'1.8.1' => '',
'1.8.2' => '',
'1.9.0' => '',
);

sub license {
Expand Down
8 changes: 6 additions & 2 deletions lib/PCAP/Bam.pm
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,15 @@ sub new {
}

sub rg_line_for_output {
my ($bam, $sample, $uniq_id) = @_;
my ($bam, $sample, $uniq_id, $existing_rgid) = @_;
my $sam = sam_ob($bam);
my $header = $sam->header->text;
my $rg_line;
while($header =~ m/^(\@RG\t[^\n]+)/xmsg) {
my $new_rg = $1;
die "BAM file appears to contain data for multiple readgroups, not supported: \n\n$header\n" if(defined $rg_line);
my ($this_id) = $new_rg =~ m/\tID:([^\t]+)/;
next if(defined $existing_rgid && $this_id ne $existing_rgid);
die "BAM file appears to contain data for multiple readgroups, not supported unless 'existing_rgid' is found: \n\n$header\n" if(defined $rg_line);
$rg_line = $new_rg;
if($uniq_id) {
my $uuid = lc Data::UUID->new->create_str;
Expand Down Expand Up @@ -135,6 +137,7 @@ sub bam_stats {
my $tmp = $options->{'tmp'};
my $bam = File::Spec->catdir($options->{'outdir'}, $options->{'sample'}).'.bam';;
my $bas = "$bam.bas";
return $bas if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
my $command = _which('bam_stats') || die "Unable to find 'bam_stats' in path";
$command .= sprintf $BAM_STATS, $bam, $bas;
if(exists $options->{'charts'} && defined $options->{'charts'}) {
Expand All @@ -143,6 +146,7 @@ sub bam_stats {
$command .= ' -p '.$chart_dir;
}
PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);
PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
return $bas;
}

Expand Down
Loading

0 comments on commit ca830ae

Please sign in to comment.