Skip to content

Commit

Permalink
add MTR plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
William McLaren committed Dec 8, 2017
1 parent 33907b0 commit e4dc4ed
Showing 1 changed file with 149 additions and 0 deletions.
149 changes: 149 additions & 0 deletions MTR.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
=head1 CONTACT
Slave Petrovski <slavep@unimelb.edu.au>
Michael Silk <silkm@student.unimelb.edu.au>
=cut

=head1 NAME
MTR
=head1 SYNOPSIS
mv MTR.pm ~/.vep/Plugins
perl variant_effect_predictor.pl -i variations.vcf --plugin MTR,mtrflatfile_1.0.txt.gz
=head1 DESCRIPTION
A VEP plugin that retrieves Missense Tolerance Ratio (MTR) scores for
variants from a tabix-indexed flat file.
MTR scores quantify the amount of purifying selection acting
specifically on missense variants in a given window of protein-coding
sequence. It is estimated across a sliding window of 31 codons and uses
observed standing variation data from the WEST component of the Exome
Aggregation Consortium Database (ExAC), version 2.0
(http://gnomad.broadinstitute.org).
Please cite the MTR publication alongside the VEP if you use this resource:
http://genome.cshlp.org/content/27/10/1715
The Bio::DB::HTS perl library or tabix utility must be installed in your path
to use this plugin. MTR flat files can be downloaded from:
http://xxxxx
=cut

package MTR;

use strict;
use warnings;

use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);

use Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin;

use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepTabixPlugin);

sub new {
my $class = shift;
my $self = $class->SUPER::new(@_);

# test tabix
die "ERROR: tabix does not seem to be in your path\n" unless `which tabix 2>&1` =~ /tabix$/;

# get MTR file
my $file = $self->params->[0];
$self->add_file($file);

# remote files?
if($file =~ /tp\:\/\//) {
my $remote_test = `tabix -f $file 1:1-1 2>&1`;
if($remote_test && $remote_test !~ /get_local_version/) {
die "$remote_test\nERROR: Could not find file or index file for remote annotation file $file\n";
}
}

# check files exist
else {
die "ERROR: MTR file $file not found\n" unless -e $file;
die "ERROR: Tabix index file $file\.tbi not found - perhaps you need to create it first?\n" unless -e $file.'.tbi';
}

# get headers and store on self
open HEAD, "tabix -fh $file 1:1-1 2>&1 | ";
while(<HEAD>) {
next unless /^\#/;
chomp;
$self->{headers} = [split];
}
close HEAD;

return $self;
}

sub feature_types {
return ['Transcript'];
}

sub variation_feature_types {
return ['VariationFeature'];
}

sub get_header_info {
my $self = shift;
return {
MTR => 'MTR score',
FDR => 'MTR false discovery rate adjusted binomial exact test.',
MTR_centile => 'MTR percentile'
}
}

sub run {
my ($self, $tva) = @_;
my $vf = $tva->variation_feature;

# get allele, reverse comp if needed
my $allele = $tva->variation_feature_seq;
reverse_comp(\$allele) if $vf->{strand} < 0;

return {} unless $allele =~ /^[ACGT]$/;

my $tr_id = $tva->transcript->stable_id;

# data is written by pos, allele, transcript ID (feature)
# grep lines read in matched on position so that they also are matched on allele and transcript ID
my ($res) = grep {
$_->{Genomic_position} eq $vf->{start} &&
$_->{Genomic_position} eq $vf->{end} &&
$_->{alt} eq $allele &&
$_->{Feature} eq $tr_id
} @{$self->get_data($vf->{chr}, $vf->{start} - 1, $vf->{end})};

# return only the keys defined by get_header_info()
return $res ? { map {$_ => $res->{$_}} grep {defined($res->{$_}) && $res->{$_} ne '.'} keys %{$self->get_header_info} } : {};
}

sub parse_data {
my ($self, $line) = @_;

$line =~ s/\r$//g;

my @split = split /\t/, $line;

# parse data into hash of col names and values
my %data = map {$self->{headers}->[$_] => $split[$_]} (0..(scalar @{$self->{headers}} - 1));

return \%data;
}

sub get_start {
return $_[1]->{'Genomic_position'};
}

sub get_end {
return $_[1]->{'Genomic_position'};
}

1;

0 comments on commit e4dc4ed

Please sign in to comment.