From ed44fdb59fe5c2489ff2b000227615fc0e857f7b Mon Sep 17 00:00:00 2001 From: Sebastian Mackowiak Date: Mon, 9 Sep 2019 13:30:18 +0200 Subject: [PATCH] Output 5p and 3p arms in bed files --- src/get_mirdeep2_precursors.pl | 85 ++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/src/get_mirdeep2_precursors.pl b/src/get_mirdeep2_precursors.pl index 472015e..9962a45 100755 --- a/src/get_mirdeep2_precursors.pl +++ b/src/get_mirdeep2_precursors.pl @@ -23,20 +23,15 @@ # and writes a fasta file and a bed file for these # See 'Usage' for more information. # -# Bed files are only created for the precursor sequences. -# -# S.M. Jan. 2012 +# S.M. Jan. 2019 # ###################################### - - use strict; use Getopt::Std; - my %options=(); getopts("r:s:dpht:mkT:o:",\%options); if(not -f $options{'r'} or $options{'h'}){ @@ -65,8 +60,6 @@ $options{'T'} = 'notTrackname'; } - - my @l1=split(/result/,$options{'r'}); my ($timestamp,$dmp)=split(".csv",$l1[1]); @@ -101,10 +94,6 @@ my @names=qw(0 1 2 3 4 5 6 7 8 9 10 11 12 mature star pres); - - - - while(){ if(/novel miRNAs predicted by miRDeep2/){ $novel=1; @@ -136,15 +125,21 @@ close OUT; close BED; open OUT,">$od/not_$names[$seqcol]${timestamp}_score${score}_to_$max.fa"; - open BED,">$od/not_$names[$seqcol]${timestamp}_score${score}_to_$max.bed"; next; }else{} next if(/^\s*$/); + + if($novel or $known){ chomp; @line=split(/\t/); my $coord='na'; $coord=$line[16] if($line[16]); + my $c5p = index($line[15],$line[13]); + my $c3p = index($line[15],$line[14]); + my $l5p = length($line[13]); + my $l3p = length($line[14]); + if($known){ if($line[1] >= $thres and $line[1] < $maxs){ if($options{'d'}){ @@ -163,7 +158,26 @@ $strandcol="0,0,255"; if($coord =~ /^(\S+):(\d+)\.\.(\d+):(\S)$/){ - $pcoord="$1:$2-$3"; + my ($c,$b,$e,$s) = ($1,$2,$3,$4); + if($options{'m'}){ + if($s eq '+'){ + $b+=$c5p; + $e=$b+$l5p; + }else{ + $b=$e-$c5p-$l5p; + $e=$e-$c5p; + } + } + if($options{'k'}){ + if($s eq '+'){ + $b+=$c3p; + $e=$b+$l3p; + }else{ + $b=$e-$c3p-$l3p; + $e=$e-$c3p; + } + } + $pcoord="$1:$b-$e"; $strandcol="255,0,0" if($4 eq "+"); if($first){ $first=0; @@ -173,7 +187,7 @@ $bedh1 =~ s/BPOS/$pcoord/; print BED "$bedh1"; } - print BED "$1\t$2\t$3\t$line[0]\t$line[1]\t$4\t$2\t$3\t$strandcol\n"; + print BED "$c\t$b\t$e\t$line[0]\t$line[1]\t$s\t$b\t$e\t$strandcol\n"; } } }else{ @@ -193,6 +207,26 @@ chomp $coord; $strandcol="0,0,255"; if($coord =~ /^(\S+):(\d+)\.\.(\d+):(\S)$/){ + my ($c,$b,$e,$s) = ($1,$2,$3,$4); + if($options{'m'}){ + if($s eq '+'){ + $b+=$c5p; + $e=$b+$l5p; + }else{ + $b=$e-$c5p-$l5p; + $e=$e-$c5p; + } + } + if($options{'k'}){ + if($s eq '+'){ + $b+=$c3p; + $e=$b+$l3p; + }else{ + $b=$e-$c3p-$l3p; + $e=$e-$c3p; + } + } + #$pcoord="$1:$b-$e"; $strandcol="255,0,0" if($4 eq "+"); if($first){ $first=0; @@ -202,7 +236,7 @@ $bedh1 =~ s/BPOS/$pcoord/; print BED "$bedh1"; } - print BED "$1\t$2\t$3\t$line[0]\t$line[1]\t$4\t$2\t$3\t$strandcol\n"; + print BED "$c\t$b\t$e\t$line[0]\t$line[1]\t$s\t$b\t$e\t$strandcol\n"; } } @@ -212,8 +246,6 @@ if($not){ chomp; @line=split(/\t/); - my $coord='na'; - $coord=$line[16] if($line[16]); if($options{'d'}){ $line[$seqcol] =~ tr/uU/tT/; } @@ -222,23 +254,6 @@ }else{ print OUT ">${line[0]}_x${line[4]}\n",uc($line[$seqcol]),"\n"; } - chomp $coord; - $strandcol="0,0,255"; - if($coord =~ /^(\S+):(\d+)\.\.(\d+):(\S)$/){ - $strandcol="255,0,0" if($4 eq "+"); - if($first){ - $first=0; - $bedh1=$bedh; - $bedh1 =~ s/TNAME/$options{'T'}.not_detected_miRNAs/; - $bedh1 =~ s/TDESC/miRNAs not detected by miRDeep2 for $options{'T'}/; - $bedh1 =~ s/BPOS/$pcoord/; - print BED "$bedh1"; - } - print BED "$1\t$2\t$3\t$line[0]\t$line[1]\t$4\t$2\t$3\t$strandcol\n"; - } - - - next; } }