Skip to content

Commit e5f60b4

Browse files
Add files via upload
my $help = "$version\n$usage\nextends perfectly-matched query in both orientations from set of read sequences\n" . "test sequence files should have single line for each sequence as in fastq\n" . "note that query must be short and fit entirely within read\n" . "implements tests for palindromic query and for query duplication in a read\n" . "-q query sequence\n" . "-f flank to extend: l=left, r=right (default=$flank)\n" . "-s suppress extensions below this length (default=$supp)\n" . "-c case of read sequences: u=upper-case, l=lower-case (default=$case)
1 parent 9a9101d commit e5f60b4

File tree

1 file changed

+78
-0
lines changed

1 file changed

+78
-0
lines changed

readStepper.pl

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#! /usr/bin/perl -w
2+
use strict ;
3+
use Getopt::Long ;
4+
5+
# ARGUMENTS
6+
my ( $query , $flank , $supp , $case ) = ( '' , 'r' , 0 , 'u' );
7+
my $version = "$0 1.1 (Jan 2013)" ;
8+
my $usage = "usage: perl $0 [options] -q query seqfiles\n" ;
9+
my $help = "$version\n$usage\nextends perfectly-matched query in both orientations from set of read sequences\n" .
10+
"test sequence files should have single line for each sequence as in fastq\n" .
11+
"note that query must be short and fit entirely within read\n" .
12+
"implements tests for palindromic query and for query duplication in a read\n" .
13+
"-q query sequence\n" .
14+
"-f flank to extend: l=left, r=right (default=$flank)\n" .
15+
"-s suppress extensions below this length (default=$supp)\n" .
16+
"-c case of read sequences: u=upper-case, l=lower-case (default=$case)\n" .
17+
"Additional options: -version, -usage, -verbose, -help" ;
18+
19+
my ( $verbose , $stdCommand ) ;
20+
die "\n$help\n\n" unless ( @ARGV > 0 ) ;
21+
my $options_okay = GetOptions (
22+
'q=s' => \$query ,
23+
'f=s' => \$flank ,
24+
's=i' => \$supp ,
25+
'c=s' => \$case ,
26+
'verbose' => \$verbose ,
27+
'version' => sub { $stdCommand = $version } ,
28+
'usage' => sub { $stdCommand = $usage } ,
29+
'help' => sub { $stdCommand = $help } ,
30+
);
31+
die "\n$stdCommand\n\n" if ( $stdCommand ) ;
32+
die "\n$help\n\n" if !$options_okay ;
33+
die "\n$help\n\n" if !@ARGV || !$query || $query =~ /[^ACGTRYMKVBHD]/i || $supp =~ /[^0-9]/ || $case !~ /^[ul]$/ || $f
34+
lank !~ /^[lr]$/ ;
35+
36+
my @files = ( @ARGV ) ;
37+
for ( @files ) { die "No file $_\n" unless -f $_ }
38+
$query = uc $query ; $query = lc $query if $case eq 'l' ;
39+
my $rquery = Revcomp ( $query ) ;
40+
if ( $query eq $rquery ) { warn "WARNING: palindromic query\n" }
41+
42+
my ( @hits1 , @hits2 , %ct ) ;
43+
my $longest = 0 ;
44+
for ( @files ) { # GET FORWARD QUERY
45+
if ($_ =~ /\.gz$/) {push @hits1 , `zcat $_ | grep $query`}
46+
else {push @hits1 , `grep $query $_` }
47+
}
48+
for ( @hits2 ) {
49+
chomp ;
50+
my $dupetest = $_ ;
51+
if ( $dupetest =~ s/$query//g > 1 ) { warn "query $query duplicated in read $_\n" }
52+
if ( $flank eq 'r' ) { s/$rquery.*// }
53+
else {
54+
s/.*$rquery// ;
55+
if ( length $_ > $longest ) { $longest = length $_ }
56+
}
57+
$_ = Revcomp ( $_ ) if $flank eq 'r' ;
58+
}
59+
#push @hits1 , ( @hits2 ) ;
60+
61+
$ct{hit} = 0;
62+
for ( sort (@hits1, @hits2) ) {
63+
my $len = length $_ ;
64+
if ( $len < $supp ) { $ct{supp} ++ ; next }
65+
$ct{hit} ++;
66+
if ( $flank eq 'l' ) { $_ = ' ' x ( $longest - $len ) . Revcomp ( $_ ) }
67+
print "$_\n" ;
68+
}
69+
$ct{supp} = 0 unless $ct{supp} ;
70+
my $sum = $ct{supp} + $ct{hit};
71+
print "$sum hits ($ct{hit} after omitting any < $supp nt) to $flank of $query in @ARGV\n";
72+
73+
sub Revcomp { # Compute the reverse complement of DNA sequence, even if degenerate
74+
my $dna = $_[0];
75+
my $revcomp = reverse $dna;
76+
$revcomp =~ tr/ACGTRYMKVBHDacgtrymkvbhd/TGCAYRKMBVDHtgcayrkmbvdh/; #N,S,W unchanged
77+
return $revcomp;
78+
}

0 commit comments

Comments
 (0)