diff --git a/perl/MYMETA.json b/perl/MYMETA.json index 19043ba..d5e5979 100644 --- a/perl/MYMETA.json +++ b/perl/MYMETA.json @@ -43,5 +43,5 @@ } }, "release_status" : "stable", - "version" : "v2.0.1" + "version" : "v2.1.0" } diff --git a/perl/MYMETA.yml b/perl/MYMETA.yml index 0a89029..3f8ad2c 100644 --- a/perl/MYMETA.yml +++ b/perl/MYMETA.yml @@ -25,4 +25,4 @@ requires: Pod::Coverage: '0.23' Template: '2.26' Test::Fatal: '0.013' -version: v2.0.1 +version: v2.1.0 diff --git a/perl/bin/brass-assemble b/perl/bin/brass-assemble index 0a17107..a19413c 100755 --- a/perl/bin/brass-assemble +++ b/perl/bin/brass-assemble @@ -34,6 +34,7 @@ use File::Temp; use Getopt::Long qw(:config bundling no_ignore_case); use Bio::DB::Sam; +use Data::Dumper; use Graph; use Graph::Reader::Velvet; @@ -49,6 +50,7 @@ $0 =~ s{.*/}{}; my %output_filename; my $output_format = 'tab'; my $basedir = "working"; +my $extreme_bed; my $verbose = 0; my $clean = 0; my $genome_fa; @@ -131,8 +133,6 @@ $bedpe = open_output($output_filename{bedpe}) if exists $output_filename{bedpe}; $tab = open_output($output_filename{tab}) if exists $output_filename{tab}; die "$0: VCF output not implemented\n" if exists $output_filename{vcf}; -my $graph_reader = Graph::Reader::Velvet->new(); - my $working_dir = File::Temp->newdir( "$basedir/tmpXXXXXX", CLEANUP => $clean ); my $counter = 0; @@ -197,6 +197,7 @@ sub assemble { my ($kept, $discarded) = $sample->write_local_reads($reads, @_); push @sample_names, $sample->sample_name(); close $reads or die "$0: closing $fname failed: $!\n"; +system("wc -l $fname"); print "Wrote $kept reads to $fname, discarded $discarded\n" if $verbose; if($kept == 0) { print "No reads found in sample $sample_names[-1] for assembly input ... skipping variant\n" if $verbose; @@ -219,6 +220,7 @@ sub assemble { print "$k: graph has $_[0] vertices; n50 $_[1], max $_[2], total $_[3]\n" if $verbose; + my $graph_reader = Graph::Reader::Velvet->new(); my $graph = $graph_reader->read_graph("$directory/LastGraph"); ## TODO there is currently a bug in velvet and I think the read contributions are suspect... diff --git a/perl/bin/brass.pl b/perl/bin/brass.pl index 7cfdc45..ef4c9e7 100755 --- a/perl/bin/brass.pl +++ b/perl/bin/brass.pl @@ -85,6 +85,7 @@ sub cleanup { my $options = shift; my $tmpdir = $options->{'tmp'}; + system("cp $tmpdir/*.brm.bam $options->{outdir}/."); move(File::Spec->catdir($tmpdir, 'logs'), File::Spec->catdir($options->{'outdir'}, 'logs')) || die $!; my $basefile = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}); diff --git a/perl/bin/brassI_prep_bam.pl b/perl/bin/brassI_prep_bam.pl index b1574a9..6fda34f 100755 --- a/perl/bin/brassI_prep_bam.pl +++ b/perl/bin/brassI_prep_bam.pl @@ -2,21 +2,21 @@ ########## LICENCE ########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project -# +# # This file is part of BRASS. -# +# # BRASS is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ########## LICENCE ########## @@ -30,7 +30,6 @@ use strict; use warnings FATAL => 'all'; -use Bio::DB::Sam; use Capture::Tiny qw { capture }; use PCAP::Bam::Bas; use List::Util qw(first); diff --git a/perl/docs.tar.gz b/perl/docs.tar.gz index b6a66f3..8d57d46 100644 Binary files a/perl/docs.tar.gz and b/perl/docs.tar.gz differ diff --git a/perl/lib/Bio/Brass.pm b/perl/lib/Bio/Brass.pm index 43eb166..5499e57 100644 --- a/perl/lib/Bio/Brass.pm +++ b/perl/lib/Bio/Brass.pm @@ -36,7 +36,7 @@ our @EXPORT = qw(find_breakpoints find_dusty_vertices is_dusty get_isolated_bp_alignment get_isolated_bp_surrounding_region $VERSION); -our $VERSION = '2.0.1'; +our $VERSION = '2.1.0'; =head1 NAME diff --git a/perl/lib/Bio/Brass/ReadSelection.pm b/perl/lib/Bio/Brass/ReadSelection.pm index 1ab82ab..90c012d 100644 --- a/perl/lib/Bio/Brass/ReadSelection.pm +++ b/perl/lib/Bio/Brass/ReadSelection.pm @@ -31,6 +31,7 @@ use List::Util qw(first); use File::Spec; use File::Temp qw(tmpnam); use Bio::Brass qw($VERSION); +use PCAP::Bam::Bas; =head1 NAME @@ -120,6 +121,7 @@ sub new { } $sample_names{$1}++ if(/\tSM:([^\t]+)/); } + $max = insert_from_bas($filename) if($max == 0); warn "Multiple sample names detected in |$filename|" if scalar keys %sample_names > 1; @@ -131,6 +133,27 @@ sub new { return bless $self, $class; } +sub insert_from_bas { + my $filename = shift; + my $max_mi = 0; + my $bas = "$filename.bas"; + if(-e $bas && -s _) { + my $bas_ob = PCAP::Bam::Bas->new($bas); + my @groups = $bas_ob->read_groups; + for my $rg_id(@groups) { + my $mean_insert_size = $bas_ob->get($rg_id, 'mean_insert_size'); + my $insert_size_sd = $bas_ob->get($rg_id, 'insert_size_sd'); + my $mi = int( $mean_insert_size + (2 * $insert_size_sd) ); + $max_mi = $mi if($mi > $max_mi); + } + } + return $max_mi; +} + +sub maxmaxins { + return shift->{maxmaxins}; +} + sub DESTROY { my ($self) = @_; if (exists $self->{unlink_base}) { @@ -160,8 +183,8 @@ quality mapping of the single end. sub _get_reads { my ($self, $chr, $pos5, $pos3, $strand, $k, $q, $abort_reads) = @_; - my $normalpos5 = $pos5 - $self->{'maxmaxins'}; - my $normalpos3 = $pos3 + $self->{'maxmaxins'}; + my $normalpos5 = $pos5 - $self->{maxmaxins}; + my $normalpos3 = $pos3 + $self->{maxmaxins}; my $mutpos5 = ($strand eq '+')? $normalpos5 : $pos5; my $mutpos3 = ($strand eq '+')? $pos3 : $normalpos3; @@ -234,7 +257,7 @@ sub write_local_reads { my ($self, $out, $chrl, $lstart, $lend, $chrh, $hstart, $hend, undef, undef, $strandl, $strandh) = @_; - my $abort_reads = 500_000; + my $abort_reads = 20_000; my ($k_reads, $q_pairs) = ({},[]); my ($k1, $d1) = $self->_get_reads($chrl, $lstart+1, $lend, $strandl, $k_reads, $q_pairs, $abort_reads); diff --git a/perl/lib/Sanger/CGP/Brass/Implement.pm b/perl/lib/Sanger/CGP/Brass/Implement.pm index 9f4ca45..6bbdf56 100644 --- a/perl/lib/Sanger/CGP/Brass/Implement.pm +++ b/perl/lib/Sanger/CGP/Brass/Implement.pm @@ -42,10 +42,11 @@ use File::Copy qw(move); use PCAP::Threaded; use PCAP::Bam; -const my $ASSEMBLE_SPLIT => 100; +const my $ASSEMBLE_SPLIT => 30; ## input -const my $BAMCOLLATE => q{ outputformat=sam exclude=PROPER_PAIR,UNMAP,MUNMAP,SECONDARY,QCFAIL,DUP,SUPPLEMENTARY mapqthres=6 classes=F,F2 T=%s/bamcollate2_%s filename=%s}; +const my $BED_INTERSECT_V => q{ intersect -ubam -v -abam %s -b %s }; +const my $BAMCOLLATE => q{ outputformat=sam exclude=PROPER_PAIR,UNMAP,MUNMAP,SECONDARY,QCFAIL,DUP,SUPPLEMENTARY mapqthres=6 classes=F,F2 T=%s/bamcollate2_%s}; # tmpdir, iter, file const my $PREP_BAM => q{ -b %s}; # basfile[ -e exclude chrs] @@ -67,7 +68,7 @@ const my $BRASS_FILTER => q{ -seq_depth 25.1 -blat %s -ref %s -tumour %s -infile ## assemble const my $BRASS_ASSEMBLE => q{ -X -m mem -O bedpe -r %s -T %s -o %s %s %s:%s.bai %s:%s.bai}; -# genome.fa, tmp, output.tab, groups, tumourbam, tumourbam, normalbam, normalbam +# extreme depth, genome.fa, tmp, output.tab, groups, tumourbam, tumourbam, normalbam, normalbam ## grass const my $GRASS => ' -genome_cache %s -ref %s -species %s -assembly %s -platform %s -protocol %s -tumour %s -normal %s -file %s'; @@ -93,8 +94,11 @@ sub input { my $outbam = File::Spec->catfile($tmp, sanitised_sample_from_bam($input)); $outbam .= '.brm.bam'; - my $command = _which('bamcollate2'); - $command .= sprintf $BAMCOLLATE, $tmp, $index, $input; + my $command = _which('bedtools'); + $command .= sprintf $BED_INTERSECT_V, $input, $options->{'depth'}; + $command .= ' | '; + $command .= _which('bamcollate2'); + $command .= sprintf $BAMCOLLATE, $tmp, $index; $command .= ' | '; $command .= "$^X "; $command .= _which('brassI_prep_bam.pl'); @@ -177,14 +181,13 @@ sub split_filtered { my $tmp = $options->{'tmp'}; return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0); - my $groups = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.groups.filtered.bedpe'); my $split_dir = File::Spec->catdir($tmp, 'split'); remove_tree($split_dir) if(-d $split_dir); make_path($split_dir); - my $command = sprintf 'split --suffix-length=6 --numeric-suffixes --verbose --lines=%s %s %s/split.', - $ASSEMBLE_SPLIT, $groups, $split_dir; + my $command = sprintf q{grep -v '^#' %s | split --suffix-length=7 --numeric-suffixes --verbose --lines=%s - %s/split.}, + $groups, $ASSEMBLE_SPLIT, $split_dir; PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0); @@ -206,13 +209,13 @@ sub assemble { my $split_dir = File::Spec->catdir($tmp, 'split'); my $split_file = File::Spec->catfile($split_dir, 'split.'); - $split_file .= sprintf '%06d', $index-1; + $split_file .= sprintf '%07d', $index-1; my $tmp_assemble = File::Spec->catdir($tmp, 'assemble'); make_path($tmp_assemble) unless(-e $tmp_assemble); my $assembled = File::Spec->catfile($tmp_assemble, 'bedpe.'); - $assembled .= sprintf '%06d', $index-1; + $assembled .= sprintf '%07d', $index-1; my $command = "$^X "; $command .= _which('brass-assemble'); diff --git a/setup.sh b/setup.sh index 59406ce..64973d2 100755 --- a/setup.sh +++ b/setup.sh @@ -2,27 +2,28 @@ ########## LICENCE ########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project -# +# # This file is part of BRASS. -# +# # BRASS is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ########## LICENCE ########## SOURCE_BLAT="http://users.soe.ucsc.edu/~kent/src/blatSrc35.zip" +SOURCE_BEDTOOLS="https://github.com/arq5x/bedtools2/releases/download/v2.21.0/bedtools-2.21.0.tar.gz" done_message () { if [ $? -eq 0 ]; then @@ -92,20 +93,6 @@ echo > $INIT_DIR/setup.log echo; echo ) >>$INIT_DIR/setup.log 2>&1 -perlmods=( "Graph" ) - -set -e -for i in "${perlmods[@]}" ; do - echo -n "Installing build prerequisite $i..." - ( - set -x - $INIT_DIR/perl/bin/cpanm -v --mirror http://cpan.metacpan.org -l $INST_PATH $i - set +x - echo; echo - ) >>$INIT_DIR/setup.log 2>&1 - done_message "" "Failed during installation of $i." -done - set -eu # cleanup inst_path @@ -121,12 +108,51 @@ PERLROOT=$INST_PATH/lib/perl5 PERLARCH=$PERLROOT/$ARCHNAME export PERL5LIB="$PERLROOT:$PERLARCH" +CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' PCAP` +if [[ "x$CHK" == "x" ]] ; then + echo "PREREQUISITE: Please install PCAP-core before proceeding: https://github.com/ICGC-TCGA-PanCancer/PCAP-core/releases" + exit 1; +fi + +perlmods=( "Graph" ) + +set -e +for i in "${perlmods[@]}" ; do + echo -n "Installing build prerequisite $i..." + ( + set -x + perl $INIT_DIR/perl/bin/cpanm -v --mirror http://cpan.metacpan.org -l $INST_PATH $i + set +x + echo; echo + ) >>$INIT_DIR/setup.log 2>&1 + done_message "" "Failed during installation of $i." +done + #create a location to build dependencies SETUP_DIR=$INIT_DIR/install_tmp mkdir -p $SETUP_DIR cd $SETUP_DIR +echo -n "Building bedtools ..." +if [ -e $SETUP_DIR/bedtools.success ]; then + echo -n " previously installed ..."; +else + cd $SETUP_DIR + ( + set -x + if [ ! -e bedtools ]; then + get_distro "bedtools2" $SOURCE_BEDTOOLS + mkdir -p bedtools2 + tar --strip-components 1 -C bedtools2 -zxf bedtools2.tar.gz + fi + make -C bedtools2 -j$CPU + cp bedtools2/bin/* $INST_PATH/bin/. + touch $SETUP_DIR/bedtools.success + )>>$INIT_DIR/setup.log 2>&1 +fi +done_message "" "Failed to build bedtools." + echo -n "Building blat ..." if [ -e $SETUP_DIR/blat.success ]; then echo -n " previously installed ..."