-
Notifications
You must be signed in to change notification settings - Fork 49
/
geo2fasta.pl
executable file
·68 lines (39 loc) · 1.24 KB
/
geo2fasta.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Std;
my $usage ="
perl geo2fasta.pl GSM > outfile
reads in all files starting with GEO and produces one big output files containing only valid sequences
modified by Sebastian M
";
my $running=0;
if(not $ARGV[0]){ die $usage;}
parse_file_geo($ARGV[0]);
exit;
sub parse_file_geo{
my @files=<@_*.txt>;
foreach(@files){
my $file = $_;
print STDERR "processing file $file\n";
open(FILE, $file) or die "Could not open file $file";
while (my $line = <FILE>){
if($line=~/^(\S+)\s+(\d+)/){
my $seq=$1;
my $cnt=$2;
while($cnt>0){
if($seq =~ /^[ACGTNUacgtnu]+$/){
print ">deflated\_$running\n";
print "$seq\n";
$running++;
$cnt--;
}else{
print STDERR "$running\n$seq\n";
$cnt = 0;
}
}
}
}
}
return;
}