-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvert_format_augustus.pl
executable file
·67 lines (61 loc) · 1.79 KB
/
convert_format_augustus.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#! /usr/bin/env perl
use strict;
use warnings;
my ($input,$output)=@ARGV;
die "Usage: $0 <input gff> <output gff>" if(@ARGV ne 2 );
my %gff;
my %exon;
my $geneid;
my $start;
open(I,"< $input") or die "Cannot open $input\n";
while (<I>) {
chomp;
next if(/^#/);
next if /^$/;
my @a=split(/\t/);
$a[0] =~ /:[0-9]*-/;
my $offset = $&;
$offset =~ tr/:-//d;
$offset = $offset - 1;
$a[0] =~ s/_sliding:[0-9]*-[0-9]*//;
if($a[3]>$a[4]){
#swap
my $tmp=$a[4];
$a[4]=$a[3];
$a[3]=$tmp;
}
$a[1]=lc($a[1]);
$a[3]=$a[3] + $offset;
$a[4]=$a[4] + $offset;
if ($a[2] eq 'gene'){
$geneid=$a[8];
$start=$a[3];
$a[8]="ID=$a[8]";
$gff{$a[0]}{$start}{$geneid}{gff} .= join("\t",@a)."\n";
}elsif($a[2] eq 'transcript'){
$a[8]="ID=$a[8];Parent=$geneid";
$a[2]="mRNA";
$gff{$a[0]}{$start}{$geneid}{gff} .= join("\t",@a)."\n";
}elsif($a[2] eq 'exon'){
$a[8]=~/transcript_id\s+\"([^\"]+)\";\s+gene_id\s+\"([^\"]+)\";/ or die "$_\n";
$exon{$1}++;
$a[8]="ID=$1.exon$exon{$1};Parent=$1";
$gff{$a[0]}{$start}{$geneid}{gff} .= join("\t",@a)."\n";
}elsif($a[2] eq 'CDS'){
$a[8]=~/transcript_id\s+\"([^\"]+)\";\s+gene_id\s+\"([^\"]+)\";/ or die "$_\n";
$a[8]="ID=cds.$1;Parent=$1";
$gff{$a[0]}{$start}{$geneid}{gff} .= join("\t",@a)."\n";
$gff{$a[0]}{$start}{$geneid}{len} += abs($a[4]-$a[3])+1;
}
}
close I;
open(O,"> $output") or die "Cannot create $output\n";
foreach my $chr(sort keys %gff){
for my $pos (sort{$a<=>$b} keys %{$gff{$chr}}){
for my $gene (sort keys %{$gff{$chr}{$pos}}){
next if $gff{$chr}{$pos}{$gene}{len} < 150;
print O "$gff{$chr}{$pos}{$gene}{gff}";
}
}
}
close O;