Skip to content

Commit

Permalink
Now filter out reads overlapping high depth regions and handle insert…
Browse files Browse the repository at this point in the history
… size hen not available in the BAM file correctly, release version 2.1.0
  • Loading branch information
keiranmraine committed Dec 16, 2014
1 parent dbeac97 commit 5eeebeb
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 38 deletions.
5 changes: 0 additions & 5 deletions perl/bin/brass-assemble
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ use Getopt::Long qw(:config bundling no_ignore_case);
use Bio::DB::Sam;
use Data::Dumper;

BEGIN {
$SIG{__WARN__} = sub {warn $_[0] unless( $_[0] =~ m/^Subroutine Tabix.* redefined/)};
};
use Tabix;

use Graph;
use Graph::Reader::Velvet;

Expand Down
1 change: 1 addition & 0 deletions perl/bin/brass.pl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
sub cleanup {
my $options = shift;
my $tmpdir = $options->{'tmp'};
system("cp $tmpdir/*.brm.bam $options->{outdir}/.");
move(File::Spec->catdir($tmpdir, 'logs'), File::Spec->catdir($options->{'outdir'}, 'logs')) || die $!;
my $basefile = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'});

Expand Down
11 changes: 5 additions & 6 deletions perl/bin/brassI_prep_bam.pl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

########## LICENCE ##########
# Copyright (c) 2014 Genome Research Ltd.
#
#
# Author: Cancer Genome Project <cgpit@sanger.ac.uk>
#
#
# This file is part of BRASS.
#
#
# BRASS is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation; either version 3 of the License, or (at your option) any
# later version.
#
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
########## LICENCE ##########
Expand All @@ -30,7 +30,6 @@
use strict;
use warnings FATAL => 'all';

use Bio::DB::Sam;
use Capture::Tiny qw { capture };
use PCAP::Bam::Bas;
use List::Util qw(first);
Expand Down
Binary file modified perl/docs.tar.gz
Binary file not shown.
26 changes: 22 additions & 4 deletions perl/lib/Bio/Brass/ReadSelection.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use List::Util qw(first);
use File::Spec;
use File::Temp qw(tmpnam);
use Bio::Brass qw($VERSION);
use PCAP::Bam::Bas;

=head1 NAME
Expand Down Expand Up @@ -120,18 +121,35 @@ sub new {
}
$sample_names{$1}++ if(/\tSM:([^\t]+)/);
}
$max = 500 if($max == 0);
$max = insert_from_bas($filename) if($max == 0);

warn "Multiple sample names detected in |$filename|" if scalar keys %sample_names > 1;

my @names = keys %sample_names;

$self->{sample_name} = $names[0] if scalar @names;
$self->{maxmaxins} = $max; # this isn't relevant for assembly
$self->{maxmaxins} = $max;

return bless $self, $class;
}

sub insert_from_bas {
my $filename = shift;
my $max_mi = 0;
my $bas = "$filename.bas";
if(-e $bas && -s _) {
my $bas_ob = PCAP::Bam::Bas->new($bas);
my @groups = $bas_ob->read_groups;
for my $rg_id(@groups) {
my $mean_insert_size = $bas_ob->get($rg_id, 'mean_insert_size');
my $insert_size_sd = $bas_ob->get($rg_id, 'insert_size_sd');
my $mi = int( $mean_insert_size + (2 * $insert_size_sd) );
$max_mi = $mi if($mi > $max_mi);
}
}
return $max_mi;
}

sub maxmaxins {
return shift->{maxmaxins};
}
Expand Down Expand Up @@ -165,8 +183,8 @@ quality mapping of the single end.
sub _get_reads {
my ($self, $chr, $pos5, $pos3, $strand, $k, $q, $abort_reads) = @_;

my $normalpos5 = $pos5 - 100;
my $normalpos3 = $pos3 + 100;
my $normalpos5 = $pos5 - $self->{maxmaxins};
my $normalpos3 = $pos3 + $self->{maxmaxins};

my $mutpos5 = ($strand eq '+')? $normalpos5 : $pos5;
my $mutpos3 = ($strand eq '+')? $pos3 : $normalpos3;
Expand Down
16 changes: 7 additions & 9 deletions perl/lib/Sanger/CGP/Brass/Implement.pm
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use File::Copy qw(move);
use PCAP::Threaded;
use PCAP::Bam;

const my $ASSEMBLE_SPLIT => 100;
const my $ASSEMBLE_SPLIT => 30;

## input
const my $BED_INTERSECT_V => q{ intersect -ubam -v -abam %s -b %s };
Expand All @@ -67,7 +67,7 @@ const my $BRASS_FILTER => q{ -seq_depth 25.1 -blat %s -ref %s -tumour %s -infile
#perl ~kr2/git/brass/perl/bin/brassI_filter.pl

## assemble
const my $BRASS_ASSEMBLE => q{ -X -m mem -O bedpe -e %s -r %s -T %s -o %s %s %s:%s.bai %s:%s.bai};
const my $BRASS_ASSEMBLE => q{ -X -m mem -O bedpe -r %s -T %s -o %s %s %s:%s.bai %s:%s.bai};
# extreme depth, genome.fa, tmp, output.tab, groups, tumourbam, tumourbam, normalbam, normalbam

## grass
Expand Down Expand Up @@ -181,14 +181,13 @@ sub split_filtered {
my $tmp = $options->{'tmp'};
return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);


my $groups = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.groups.filtered.bedpe');

my $split_dir = File::Spec->catdir($tmp, 'split');
remove_tree($split_dir) if(-d $split_dir);
make_path($split_dir);
my $command = sprintf 'split --suffix-length=6 --numeric-suffixes --verbose --lines=%s %s %s/split.',
$ASSEMBLE_SPLIT, $groups, $split_dir;
my $command = sprintf q{grep -v '^#' %s | split --suffix-length=7 --numeric-suffixes --verbose --lines=%s - %s/split.},
$groups, $ASSEMBLE_SPLIT, $split_dir;

PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), $command, 0);

Expand All @@ -210,18 +209,17 @@ sub assemble {

my $split_dir = File::Spec->catdir($tmp, 'split');
my $split_file = File::Spec->catfile($split_dir, 'split.');
$split_file .= sprintf '%06d', $index-1;
$split_file .= sprintf '%07d', $index-1;

my $tmp_assemble = File::Spec->catdir($tmp, 'assemble');
make_path($tmp_assemble) unless(-e $tmp_assemble);

my $assembled = File::Spec->catfile($tmp_assemble, 'bedpe.');
$assembled .= sprintf '%06d', $index-1;
$assembled .= sprintf '%07d', $index-1;

my $command = "$^X ";
$command .= _which('brass-assemble');
$command .= sprintf $BRASS_ASSEMBLE, $options->{'depth'},
$options->{'genome'},
$command .= sprintf $BRASS_ASSEMBLE, $options->{'genome'},
$tmp_assemble,
$assembled,
$split_file,
Expand Down
34 changes: 20 additions & 14 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,20 +93,6 @@ echo > $INIT_DIR/setup.log
echo; echo
) >>$INIT_DIR/setup.log 2>&1

perlmods=( "Graph" )

set -e
for i in "${perlmods[@]}" ; do
echo -n "Installing build prerequisite $i..."
(
set -x
$INIT_DIR/perl/bin/cpanm -v --mirror http://cpan.metacpan.org -l $INST_PATH $i
set +x
echo; echo
) >>$INIT_DIR/setup.log 2>&1
done_message "" "Failed during installation of $i."
done

set -eu

# cleanup inst_path
Expand All @@ -122,6 +108,26 @@ PERLROOT=$INST_PATH/lib/perl5
PERLARCH=$PERLROOT/$ARCHNAME
export PERL5LIB="$PERLROOT:$PERLARCH"

CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' PCAP`
if [[ "x$CHK" == "x" ]] ; then
echo "PREREQUISITE: Please install PCAP-core before proceeding: https://github.com/ICGC-TCGA-PanCancer/PCAP-core/releases"
exit 1;
fi

perlmods=( "Graph" )

set -e
for i in "${perlmods[@]}" ; do
echo -n "Installing build prerequisite $i..."
(
set -x
perl $INIT_DIR/perl/bin/cpanm -v --mirror http://cpan.metacpan.org -l $INST_PATH $i
set +x
echo; echo
) >>$INIT_DIR/setup.log 2>&1
done_message "" "Failed during installation of $i."
done

#create a location to build dependencies
SETUP_DIR=$INIT_DIR/install_tmp
mkdir -p $SETUP_DIR
Expand Down

0 comments on commit 5eeebeb

Please sign in to comment.