Skip to content

Commit

Permalink
Merge pull request #7 from cancerit/dev
Browse files Browse the repository at this point in the history
Prevent memory explosion in assembly, release 2.1.0
  • Loading branch information
keiranmraine committed Dec 16, 2014
2 parents ba38b72 + 999d672 commit 8e44b1d
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 43 deletions.
2 changes: 1 addition & 1 deletion perl/MYMETA.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,5 @@
}
},
"release_status" : "stable",
"version" : "v2.0.1"
"version" : "v2.1.0"
}
2 changes: 1 addition & 1 deletion perl/MYMETA.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ requires:
Pod::Coverage: '0.23'
Template: '2.26'
Test::Fatal: '0.013'
version: v2.0.1
version: v2.1.0
6 changes: 4 additions & 2 deletions perl/bin/brass-assemble
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use File::Temp;
use Getopt::Long qw(:config bundling no_ignore_case);

use Bio::DB::Sam;
use Data::Dumper;

use Graph;
use Graph::Reader::Velvet;
Expand All @@ -49,6 +50,7 @@ $0 =~ s{.*/}{};
my %output_filename;
my $output_format = 'tab';
my $basedir = "working";
my $extreme_bed;
my $verbose = 0;
my $clean = 0;
my $genome_fa;
Expand Down Expand Up @@ -131,8 +133,6 @@ $bedpe = open_output($output_filename{bedpe}) if exists $output_filename{bedpe};
$tab = open_output($output_filename{tab}) if exists $output_filename{tab};
die "$0: VCF output not implemented\n" if exists $output_filename{vcf};

my $graph_reader = Graph::Reader::Velvet->new();

my $working_dir = File::Temp->newdir( "$basedir/tmpXXXXXX", CLEANUP => $clean );

my $counter = 0;
Expand Down Expand Up @@ -197,6 +197,7 @@ sub assemble {
my ($kept, $discarded) = $sample->write_local_reads($reads, @_);
push @sample_names, $sample->sample_name();
close $reads or die "$0: closing $fname failed: $!\n";
system("wc -l $fname");
print "Wrote $kept reads to $fname, discarded $discarded\n" if $verbose;
if($kept == 0) {
print "No reads found in sample $sample_names[-1] for assembly input ... skipping variant\n" if $verbose;
Expand All @@ -219,6 +220,7 @@ sub assemble {
print "$k: graph has $_[0] vertices; n50 $_[1], max $_[2], total $_[3]\n"
if $verbose;

my $graph_reader = Graph::Reader::Velvet->new();
my $graph = $graph_reader->read_graph("$directory/LastGraph");

## TODO there is currently a bug in velvet and I think the read contributions are suspect...
Expand Down
1 change: 1 addition & 0 deletions perl/bin/brass.pl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
sub cleanup {
my $options = shift;
my $tmpdir = $options->{'tmp'};
system("cp $tmpdir/*.brm.bam $options->{outdir}/.");
move(File::Spec->catdir($tmpdir, 'logs'), File::Spec->catdir($options->{'outdir'}, 'logs')) || die $!;
my $basefile = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'});

Expand Down
11 changes: 5 additions & 6 deletions perl/bin/brassI_prep_bam.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

########## LICENCE ##########
# Copyright (c) 2014 Genome Research Ltd.
#
#
# Author: Cancer Genome Project <cgpit@sanger.ac.uk>
#
#
# This file is part of BRASS.
#
#
# BRASS is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
########## LICENCE ##########
Expand All @@ -30,7 +30,6 @@
use strict;
use warnings FATAL => 'all';

use Bio::DB::Sam;
use Capture::Tiny qw { capture };
use PCAP::Bam::Bas;
use List::Util qw(first);
Expand Down
Binary file modified perl/docs.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion perl/lib/Bio/Brass.pm
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ our @EXPORT = qw(find_breakpoints find_dusty_vertices
is_dusty get_isolated_bp_alignment get_isolated_bp_surrounding_region
$VERSION);

our $VERSION = '2.0.1';
our $VERSION = '2.1.0';

=head1 NAME
Expand Down
29 changes: 26 additions & 3 deletions perl/lib/Bio/Brass/ReadSelection.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use List::Util qw(first);
use File::Spec;
use File::Temp qw(tmpnam);
use Bio::Brass qw($VERSION);
use PCAP::Bam::Bas;

=head1 NAME
Expand Down Expand Up @@ -120,6 +121,7 @@ sub new {
}
$sample_names{$1}++ if(/\tSM:([^\t]+)/);
}
$max = insert_from_bas($filename) if($max == 0);

warn "Multiple sample names detected in |$filename|" if scalar keys %sample_names > 1;

Expand All @@ -131,6 +133,27 @@ sub new {
return bless $self, $class;
}

sub insert_from_bas {
my $filename = shift;
my $max_mi = 0;
my $bas = "$filename.bas";
if(-e $bas && -s _) {
my $bas_ob = PCAP::Bam::Bas->new($bas);
my @groups = $bas_ob->read_groups;
for my $rg_id(@groups) {
my $mean_insert_size = $bas_ob->get($rg_id, 'mean_insert_size');
my $insert_size_sd = $bas_ob->get($rg_id, 'insert_size_sd');
my $mi = int( $mean_insert_size + (2 * $insert_size_sd) );
$max_mi = $mi if($mi > $max_mi);
}
}
return $max_mi;
}

sub maxmaxins {
return shift->{maxmaxins};
}

sub DESTROY {
my ($self) = @_;
if (exists $self->{unlink_base}) {
Expand Down Expand Up @@ -160,8 +183,8 @@ quality mapping of the single end.
sub _get_reads {
my ($self, $chr, $pos5, $pos3, $strand, $k, $q, $abort_reads) = @_;

my $normalpos5 = $pos5 - $self->{'maxmaxins'};
my $normalpos3 = $pos3 + $self->{'maxmaxins'};
my $normalpos5 = $pos5 - $self->{maxmaxins};
my $normalpos3 = $pos3 + $self->{maxmaxins};

my $mutpos5 = ($strand eq '+')? $normalpos5 : $pos5;
my $mutpos3 = ($strand eq '+')? $pos3 : $normalpos3;
Expand Down Expand Up @@ -234,7 +257,7 @@ sub write_local_reads {
my ($self, $out, $chrl, $lstart, $lend, $chrh, $hstart, $hend,
undef, undef, $strandl, $strandh) = @_;

my $abort_reads = 500_000;
my $abort_reads = 20_000;
my ($k_reads, $q_pairs) = ({},[]);

my ($k1, $d1) = $self->_get_reads($chrl, $lstart+1, $lend, $strandl, $k_reads, $q_pairs, $abort_reads);
Expand Down
23 changes: 13 additions & 10 deletions perl/lib/Sanger/CGP/Brass/Implement.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ use File::Copy qw(move);
use PCAP::Threaded;
use PCAP::Bam;

const my $ASSEMBLE_SPLIT => 100;
const my $ASSEMBLE_SPLIT => 30;

## input
const my $BAMCOLLATE => q{ outputformat=sam exclude=PROPER_PAIR,UNMAP,MUNMAP,SECONDARY,QCFAIL,DUP,SUPPLEMENTARY mapqthres=6 classes=F,F2 T=%s/bamcollate2_%s filename=%s};
const my $BED_INTERSECT_V => q{ intersect -ubam -v -abam %s -b %s };
const my $BAMCOLLATE => q{ outputformat=sam exclude=PROPER_PAIR,UNMAP,MUNMAP,SECONDARY,QCFAIL,DUP,SUPPLEMENTARY mapqthres=6 classes=F,F2 T=%s/bamcollate2_%s};
# tmpdir, iter, file
const my $PREP_BAM => q{ -b %s};
# basfile[ -e exclude chrs]
Expand All @@ -67,7 +68,7 @@ const my $BRASS_FILTER => q{ -seq_depth 25.1 -blat %s -ref %s -tumour %s -infile

## assemble
const my $BRASS_ASSEMBLE => q{ -X -m mem -O bedpe -r %s -T %s -o %s %s %s:%s.bai %s:%s.bai};
# genome.fa, tmp, output.tab, groups, tumourbam, tumourbam, normalbam, normalbam
# extreme depth, genome.fa, tmp, output.tab, groups, tumourbam, tumourbam, normalbam, normalbam

## grass
const my $GRASS => ' -genome_cache %s -ref %s -species %s -assembly %s -platform %s -protocol %s -tumour %s -normal %s -file %s';
Expand All @@ -93,8 +94,11 @@ sub input {
my $outbam = File::Spec->catfile($tmp, sanitised_sample_from_bam($input));
$outbam .= '.brm.bam';

my $command = _which('bamcollate2');
$command .= sprintf $BAMCOLLATE, $tmp, $index, $input;
my $command = _which('bedtools');
$command .= sprintf $BED_INTERSECT_V, $input, $options->{'depth'};
$command .= ' | ';
$command .= _which('bamcollate2');
$command .= sprintf $BAMCOLLATE, $tmp, $index;
$command .= ' | ';
$command .= "$^X ";
$command .= _which('brassI_prep_bam.pl');
Expand Down Expand Up @@ -177,14 +181,13 @@ sub split_filtered {
my $tmp = $options->{'tmp'};
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);


my $groups = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.groups.filtered.bedpe');

my $split_dir = File::Spec->catdir($tmp, 'split');
remove_tree($split_dir) if(-d $split_dir);
make_path($split_dir);
my $command = sprintf 'split --suffix-length=6 --numeric-suffixes --verbose --lines=%s %s %s/split.',
$ASSEMBLE_SPLIT, $groups, $split_dir;
my $command = sprintf q{grep -v '^#' %s | split --suffix-length=7 --numeric-suffixes --verbose --lines=%s - %s/split.},
$groups, $ASSEMBLE_SPLIT, $split_dir;

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);

Expand All @@ -206,13 +209,13 @@ sub assemble {

my $split_dir = File::Spec->catdir($tmp, 'split');
my $split_file = File::Spec->catfile($split_dir, 'split.');
$split_file .= sprintf '%06d', $index-1;
$split_file .= sprintf '%07d', $index-1;

my $tmp_assemble = File::Spec->catdir($tmp, 'assemble');
make_path($tmp_assemble) unless(-e $tmp_assemble);

my $assembled = File::Spec->catfile($tmp_assemble, 'bedpe.');
$assembled .= sprintf '%06d', $index-1;
$assembled .= sprintf '%07d', $index-1;

my $command = "$^X ";
$command .= _which('brass-assemble');
Expand Down
64 changes: 45 additions & 19 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,28 @@

########## LICENCE ##########
# Copyright (c) 2014 Genome Research Ltd.
#
#
# Author: Cancer Genome Project <cgpit@sanger.ac.uk>
#
#
# This file is part of BRASS.
#
#
# BRASS is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
########## LICENCE ##########


SOURCE_BLAT="http://users.soe.ucsc.edu/~kent/src/blatSrc35.zip"
SOURCE_BEDTOOLS="https://github.com/arq5x/bedtools2/releases/download/v2.21.0/bedtools-2.21.0.tar.gz"

done_message () {
if [ $? -eq 0 ]; then
Expand Down Expand Up @@ -92,20 +93,6 @@ echo > $INIT_DIR/setup.log
echo; echo
) >>$INIT_DIR/setup.log 2>&1

perlmods=( "Graph" )

set -e
for i in "${perlmods[@]}" ; do
echo -n "Installing build prerequisite $i..."
(
set -x
$INIT_DIR/perl/bin/cpanm -v --mirror http://cpan.metacpan.org -l $INST_PATH $i
set +x
echo; echo
) >>$INIT_DIR/setup.log 2>&1
done_message "" "Failed during installation of $i."
done

set -eu

# cleanup inst_path
Expand All @@ -121,12 +108,51 @@ PERLROOT=$INST_PATH/lib/perl5
PERLARCH=$PERLROOT/$ARCHNAME
export PERL5LIB="$PERLROOT:$PERLARCH"

CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' PCAP`
if [[ "x$CHK" == "x" ]] ; then
echo "PREREQUISITE: Please install PCAP-core before proceeding: https://github.com/ICGC-TCGA-PanCancer/PCAP-core/releases"
exit 1;
fi

perlmods=( "Graph" )

set -e
for i in "${perlmods[@]}" ; do
echo -n "Installing build prerequisite $i..."
(
set -x
perl $INIT_DIR/perl/bin/cpanm -v --mirror http://cpan.metacpan.org -l $INST_PATH $i
set +x
echo; echo
) >>$INIT_DIR/setup.log 2>&1
done_message "" "Failed during installation of $i."
done

#create a location to build dependencies
SETUP_DIR=$INIT_DIR/install_tmp
mkdir -p $SETUP_DIR

cd $SETUP_DIR

echo -n "Building bedtools ..."
if [ -e $SETUP_DIR/bedtools.success ]; then
echo -n " previously installed ...";
else
cd $SETUP_DIR
(
set -x
if [ ! -e bedtools ]; then
get_distro "bedtools2" $SOURCE_BEDTOOLS
mkdir -p bedtools2
tar --strip-components 1 -C bedtools2 -zxf bedtools2.tar.gz
fi
make -C bedtools2 -j$CPU
cp bedtools2/bin/* $INST_PATH/bin/.
touch $SETUP_DIR/bedtools.success
)>>$INIT_DIR/setup.log 2>&1
fi
done_message "" "Failed to build bedtools."

echo -n "Building blat ..."
if [ -e $SETUP_DIR/blat.success ]; then
echo -n " previously installed ..."
Expand Down

0 comments on commit 8e44b1d

Please sign in to comment.