Skip to content

Commit

Permalink
Merge branch 'release/v2.5.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Jun 14, 2016
2 parents 5b9262e + 3fd0314 commit 4acfdac
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 28 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ bin/bam_to_sra_sub.pl
bin/bamToBw.pl
bin/bwa_aln.pl
bin/bwa_mem.pl
bin/detectExtremeDepth.pl
bin/diff_bams.pl
bin/gnos_pull.pl
bin/monitor.pl
Expand Down
5 changes: 2 additions & 3 deletions Makefile.PL
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

##########LICENCE##########
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
# Copyright (C) 2014 ICGC PanCancer Project
# Copyright (C) 2014-2016 ICGC PanCancer Project
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand Down Expand Up @@ -33,6 +33,7 @@ WriteMakefile(
bin/bwa_aln.pl
bin/bwa_mem.pl
bin/bam_stats.pl
bin/detectExtremeDepth.pl
bin/diff_bams.pl
bin/monitor.pl
bin/xml_to_bas.pl
Expand All @@ -55,8 +56,6 @@ WriteMakefile(
'Proc::ProcessTable' => 0.50,
'Data::UUID' => 1.219,
'Test::Fatal' => 0.013,
'GD' => 2.52,
'Math::Gradient' => 0.04,
'Devel::Cover' => 1.09,
'Pod::Coverage' => 0.23,
'Term::UI' => 0.42, # currently in core but scheduled for removal 5.17, no alternative recommended
Expand Down
167 changes: 167 additions & 0 deletions bin/detectExtremeDepth.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/perl

##########LICENCE##########
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
# Copyright (C) 2014-2016 ICGC PanCancer Project
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not see:
# http://www.gnu.org/licenses/gpl-2.0.html
##########LICENCE##########

use Cwd qw(abs_path);
use strict;
use English qw( -no_match_vars );
use warnings FATAL => 'all';

use File::Basename;
use Carp;
use Getopt::Long;
use Pod::Usage;

use PCAP;

use Bio::DB::BigWig 'binMean','binStdev';

my %chr_stats;
my @chr_order;

{
my $options = option_builder();

my $wig = Bio::DB::BigWig->new(-bigwig=>$options->{'b'});
my @chroms = $wig->features(-type=>'summary');

for my $c (@chroms) {
my $seqid = $c->seq_id;
next if(defined $options->{'r'} && $options->{'r'} ne $seqid && 'chr'.$options->{'r'} ne $seqid);
my $start = $c->start;

my $stats = $c->statistical_summary(1);
my $bin_width = $c->length/@$stats;

my $s = shift @{$stats};

my $mean = binMean($s);
my $stdev = binStdev($s);
my $end = $start + $bin_width-1;

push @chr_order, $seqid;
$chr_stats{$seqid} = {'mean' => binMean($s),
'stdev' => binStdev($s)};
warn sprintf "%s: mean %.2f, stdev %.2f\n", $seqid, $chr_stats{$seqid}{'mean'}, $chr_stats{$seqid}{'stdev'};
}

open my $OFH, '>', $options->{'o'} or die "Failed to create $options->{o}: $!\n";
for my $chr(@chr_order) {
my $max_val = $chr_stats{$chr}{'mean'} + ($chr_stats{$chr}{'stdev'} * $options->{'s'});
warn sprintf "%s: Max depth permitted = %d\n", $chr, $max_val;
my $iterator = $wig->get_seq_stream(-seq_id=> $chr);
while (my $p = $iterator->next_seq) {
next if($p->score <= $max_val);
printf $OFH "%s\t%d\t%d\t%d\n", $chr, $p->start-1, $p->end, $p->score;
}
}
close $OFH;
}

sub option_builder {
my ($factory) = @_;

my %opts;

&GetOptions (
'h|help' => \$opts{'h'},
'b|bigwig=s' => \$opts{'b'},
'o|output=s' => \$opts{'o'},
'r|ref=s' => \$opts{'r'},
'd|decode=s@' => \$opts{'d'},
's|sd=n' => \$opts{'s'},
'v|version' => \$opts{'v'},
);

if(defined $opts{'v'}) {
print PCAP->VERSION,"\n";
exit 0;
}

pod2usage(0) if($opts{'h'});

pod2usage(1) if(!$opts{'b'} || !$opts{'o'});

croak $opts{'b'}.' was not found or is empty' if(!-e $opts{'b'} || !-s $opts{'b'});

if($opts{'d'}) {
if(!$opts{'r'}) {
croak '-d should not be defined without -r';
}
my %decode;
foreach my $d_str(@{$opts{'d'}}) {
if($d_str =~ m/^(\d+)\:(.*)$/) {
my $num = $1;
my $chr = $2;
$decode{$num} = $chr;
}
else {
croak "Decode string of $d_str is invalid see --help";
}
}
if(defined $decode{$opts{'r'}}) {
$opts{'r'} = $decode{$opts{'r'}};
}
}

my $fn = fileparse($opts{'b'});
$fn =~ s/\.bw$//;
$opts{'o'} .= '/' if($opts{'o'} !~ m/\/$/);
$opts{'o'} .= $fn;
if($opts{'r'}) {
$opts{'o'} .= '.'.$opts{'r'};
}
$opts{'o'} .= '.bed';

if(!$opts{'s'}) {
$opts{'s'} = 12;
}

return \%opts;
}

__END__
=head1 NAME
detectExtremeDepth.pl - Generate profile of BigWig file and identify regions outside the normal range
=head1 SYNOPSIS
General Options (list OR project must be defined):
--bigwig (-b) FILE BigWig file path
--output (-o) DIR Folder to send output to
- named as input file with '.tab' extension
- if '-r' defined '.{val}' will prefix '.bed'
Optional:
--ref (-r) STR Restrict to this reference (mainly for testing)
- without 'chr' prefix, will test with and without the 'chr' for you.
--decode (-d) STR Decode -r to chromosome names (do not include 'chr')
e.g. -d 23:X -d 24:Y -d 25:MT
--sd (-s) INT Number of standard deviations above mean for group to be included [12]
--help (-h) This message
--version (-v) Version
Examples:
perl ~/detectExtremeDepth.pl -o someplace -b sample.bw
=cut
Binary file modified docs.tar.gz
Binary file not shown.
1 change: 1 addition & 0 deletions lib/PCAP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ const my %UPGRADE_PATH => ( # all earlier versions need full upgrade
'2.2.1' => '',
'2.3.0' => '',
'2.4.0' => '',
'2.5.0' => '',
);

sub license {
Expand Down
38 changes: 26 additions & 12 deletions lib/PCAP/Bam/Stats.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,18 @@ use Const::Fast qw( const );
use Try::Tiny;
use File::Basename;

use Math::Gradient;
use List::Util qw(sum sum0 first);
use Bio::DB::HTS;
use GD::Image;
use JSON;

my $plots_available = 0;
eval {
require Math::Gradient;
require GD::Image;
$plots_available = 1;
1;
};

const my $PAIRED => 1; # not needed
const my $PROPER => 2;
const my $UNMAPPED => 4;
Expand Down Expand Up @@ -75,7 +81,11 @@ sub init{

my $path = $args{-path};
my $sam;
my $q_scoring = $args{-qscoring};
my $q_scoring = $args{-qscoring} ? 1 : 0;
if($q_scoring && $plots_available == 0) {
$q_scoring = 0;
warn "WARN: quality plots have been disabled as Math::Gradient and GD::Image were not found\n";
}

unless ($sam && ref $sam eq 'Bio::DB::HTS'){
$sam = Bio::DB::HTS->new(-bam => $path);
Expand All @@ -89,10 +99,10 @@ sub init{
}

my $groups = _parse_header($sam);
_process_reads($groups,$sam,$q_scoring, $mod, $rem) unless(defined $args{-no_proc});
$self->{_file_path} = $path;
$self->{_qualiy_scoring} = $q_scoring ? 1 : 0;
$self->{_qualiy_scoring} = $q_scoring;
$self->{_groups} = $groups;
_process_reads($groups,$sam,$q_scoring, $mod, $rem) unless(defined $args{-no_proc});
}

sub merge_json_stats {
Expand Down Expand Up @@ -147,7 +157,6 @@ sub _parse_header {

sub _process_reads {
my ($groups, $sam, $qualiy_scoring, $mod, $rem) = @_;

my $bam = $sam->hts_file;
my $header = $bam->header_read;
my $processed_x = 0;
Expand Down Expand Up @@ -675,8 +684,13 @@ sub fqplots {
for my $rg(keys %{$groups}) {
for my $read(1..2) {
my $plot_vals = $groups->{$rg}->{'fqp_'.$read};
down_pop_quals($plot_vals); # adds mem bloat as undef values are all filled
fastq2image($output_dir_path, $plot_vals, $rg, $read, $groups->{$rg}->{'length_'.$read}, $groups->{$rg}->{'count_'.$read});
if($plot_vals) {
down_pop_quals($plot_vals); # adds mem bloat as undef values are all filled
fastq2image($output_dir_path, $plot_vals, $rg, $read, $groups->{$rg}->{'length_'.$read}, $groups->{$rg}->{'count_'.$read});
}
elsif($rg ne q{.}) {
warn "WARN: No plot_vals found for RG '$rg'\n";
}
delete $groups->{$rg}->{'fqp_'.$read};
}
}
Expand Down Expand Up @@ -754,7 +768,7 @@ sub fastq2image {
my $value;
while (scalar @{$values}) {
if ($cycle_count == 1 && ($quality == 1 || ($quality > 1 && $quality % 5 == 0))) {
$im->string(GD::gdSmallFont, 25, $y1-5, $quality, $black);
$im->string(GD::Font->Small, 25, $y1-5, $quality, $black);
}
$y2 = $y1 + $shift;
$im->filledRectangle($x1,$y1,$x2,$y2, $colours->{int ((pop @{$values}) / $read_pct)});
Expand All @@ -763,7 +777,7 @@ sub fastq2image {
}

if ($cycle_count == 1 || $cycle_count % 5 == 0) {
$im->string(GD::gdSmallFont, $x1, $y1+5, $cycle_count, $black);
$im->string(GD::Font->Small, $x1, $y1+5, $cycle_count, $black);
}
$x1 = $x2;
}
Expand All @@ -775,9 +789,9 @@ sub fastq2image {

my $start_xaxis_label = (int $num_cycles*$shift/2) - 40;
if ($start_xaxis_label < 0) { $start_xaxis_label = 0; }
$im->string(GD::gdSmallFont, $start_xaxis_label, $y1+20, $xaxis_label, $black);
$im->string(GD::Font->Small, $start_xaxis_label, $y1+20, $xaxis_label, $black);

$im->stringUp(GD::gdSmallFont, 5, $height/2, q[Quality], $black);
$im->stringUp(GD::Font->Small, 5, $height/2, q[Quality], $black);

open my $PNG, '>', $out_file;
binmode($PNG);
Expand Down
46 changes: 33 additions & 13 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ SOURCE_HTSLIB="https://github.com/samtools/htslib/archive/1.3.1.tar.gz"

# for bigwig
SOURCE_JKENT_BIN="https://github.com/ENCODE-DCC/kentUtils/raw/master/bin/linux.x86_64"
# for Bio::DB::BigWig
SOURCE_KENTSRC="http://hgdownload.cse.ucsc.edu/admin/jksrc.zip"
# for fast merging of per-chr BW files
SOURCE_LIB_BW="https://github.com/dpryan79/libBigWig/archive/0.1.6.tar.gz"

# for biobambam
Expand Down Expand Up @@ -191,21 +194,21 @@ else
fi

if [[ ",$COMPILE," == *,biobambam,* ]] ; then
echo -n "Building biobambam ..."
if [ -e $SETUP_DIR/biobambam.success ]; then
echo " previously installed ..."
echo -n "Building biobambam2 ..."
if [ -e $SETUP_DIR/biobambam2.success ]; then
echo " previously installed2 ..."
else
cd $SETUP_DIR
get_distro "biobambam" $SOURCE_BBB_BIN_DIST
mkdir -p biobambam
tar --strip-components 1 -C biobambam -zxf biobambam.tar.gz
get_distro "biobambam2" $SOURCE_BBB_BIN_DIST
mkdir -p biobambam2
tar --strip-components 1 -C biobambam2 -zxf biobambam2.tar.gz
mkdir -p $INST_PATH/bin $INST_PATH/etc $INST_PATH/lib $INST_PATH/share
rm -f biobambam/bin/curl # don't let this file in SSL doesn't work
cp -r biobambam/bin/* $INST_PATH/bin/.
cp -r biobambam/etc/* $INST_PATH/etc/.
cp -r biobambam/lib/* $INST_PATH/lib/.
cp -r biobambam/share/* $INST_PATH/share/.
touch $SETUP_DIR/biobambam.success
rm -f biobambam2/bin/curl # don't let this file in SSL doesn't work
cp -r biobambam2/bin/* $INST_PATH/bin/.
cp -r biobambam2/etc/* $INST_PATH/etc/.
cp -r biobambam2/lib/* $INST_PATH/lib/.
cp -r biobambam2/share/* $INST_PATH/share/.
touch $SETUP_DIR/biobambam2.success
echo
fi
else
Expand Down Expand Up @@ -239,7 +242,7 @@ cd $INIT_DIR
if [[ ",$COMPILE," == *,samtools,* ]] ; then
echo -n "Building Bio::DB::HTS ..."
if [ -e $SETUP_DIR/biohts.success ]; then
echo -n " previously installed ...";
echo " previously installed ...";
else
cd $SETUP_DIR
$CPANM --mirror http://cpan.metacpan.org --notest -l $INST_PATH Module::Build Bio::Root::Version
Expand All @@ -254,6 +257,23 @@ else
echo "Bio::DB::HTS - No change between PCAP versions" # based on samtools tag
fi

echo -n "Building kentsrc + Bio::DB::BigFile ..."
if [ -e $SETUP_DIR/kentsrc.success ]; then
echo " previously installed ...";
else
cd $SETUP_DIR
get_distro "kentsrc" $SOURCE_KENTSRC
unzip -q kentsrc.zip
perl -pi -e 's/(\s+CFLAGS=)$/${1}-fPIC/' kent/src/inc/common.mk
cd kent/src/lib
export MACHTYPE=i686 # for a 64-bit system
make
cd ../
export KENT_SRC=`pwd`
cd $SETUP_DIR
$CPANM --mirror http://cpan.metacpan.org -l $INST_PATH Bio::DB::BigFile
fi

cd $INIT_DIR

echo -n "Installing Perl prerequisites ..."
Expand Down

0 comments on commit 4acfdac

Please sign in to comment.