Skip to content

Commit b72d2e2

Browse files
committed
Changed code style
1 parent 27b6efa commit b72d2e2

File tree

2 files changed

+91
-90
lines changed

2 files changed

+91
-90
lines changed

README

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Pairwise alignment in Python.
1+
Pairwise string alignment in Python.
22

33
My contribution will be:
44
* Code cleanup

water

Lines changed: 90 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,33 @@ import sys, string
88

99
# zeros() was origianlly from NumPy. Implemented by alevchuk 2011-04-10
1010
def zeros(shape):
11-
retval = []
12-
for x in range(shape[0]):
13-
retval.append([])
14-
for y in range(shape[1]):
15-
retval[-1].append(0)
16-
return retval
11+
retval = []
12+
for x in range(shape[0]):
13+
retval.append([])
14+
for y in range(shape[1]):
15+
retval[-1].append(0)
16+
return retval
1717

18-
#read the first sequence
19-
f1=open(sys.argv[1], 'r')
20-
seq1=f1.readline()
21-
seq1=string.strip(seq1)
18+
# Command-line arguments
19+
f1 = open(sys.argv[1], 'r')
20+
seq1 = f1.readline()
21+
seq1 = string.strip(seq1)
2222

23-
#read the second sequence
24-
f2=open(sys.argv[2], 'r')
25-
seq2=f2.readline()
26-
seq2=string.strip(seq2)
23+
f2 = open(sys.argv[2], 'r')
24+
seq2 = f2.readline()
25+
seq2 = string.strip(seq2)
2726

28-
m,n = len(seq1),len(seq2) #length of two sequences
27+
m, n = len(seq1), len(seq2) # length of two sequences
2928

3029
match_award = 10
3130
mismatch_penalty = -5
3231
gap_penalty = -5 # both for opening and extanding
3332

34-
#generate DP table and traceback path pointer matrix
35-
score=zeros((m+1,n+1)) #the DP table
36-
pointer=zeros((m+1,n+1)) #to store the traceback path
33+
# Generate DP table and traceback path pointer matrix
34+
score = zeros((m+1, n+1)) # the DP table
35+
pointer = zeros((m+1, n+1)) # to store the traceback path
3736

38-
P=0;
37+
P = 0
3938

4039
def match_score(alpha, beta):
4140
if alpha == beta:
@@ -45,85 +44,87 @@ def match_score(alpha, beta):
4544
else:
4645
return mismatch_penalty
4746

48-
max_score=P; #initial maximum score in DP table
49-
50-
#calculate DP table and mark pointers
51-
for i in range(1,m+1):
52-
for j in range(1,n+1):
53-
score_up=score[i-1][j] + gap_penalty
54-
score_down=score[i][j-1] + gap_penalty
55-
score_diagonal=score[i-1][j-1]+match_score(seq1[i-1],seq2[j-1]);
56-
score[i][j]=max(0,score_up,score_down,score_diagonal);
57-
if score[i][j]==0:
58-
pointer[i][j]=0; #0 means end of the path
59-
if score[i][j]==score_up:
60-
pointer[i][j]=1; #1 means trace up
61-
if score[i][j]==score_down:
62-
pointer[i][j]=2; #2 means trace left
63-
if score[i][j]==score_diagonal:
64-
pointer[i][j]=3; #3 means trace diagonal
65-
if score[i][j]>=max_score:
66-
max_i=i;
67-
max_j=j;
68-
max_score=score[i][j];
69-
#END of DP table
70-
71-
72-
align1,align2='',''; #initial sequences
73-
74-
i,j=max_i,max_j; #indices of path starting point
47+
max_score = P # initial maximum score in DP table
48+
49+
# Calculate DP table and mark pointers
50+
for i in range(1, m + 1):
51+
for j in range(1, n + 1):
52+
score_up = score[i-1][j] + gap_penalty
53+
score_down = score[i][j-1] + gap_penalty
54+
score_diagonal = score[i-1][j-1] + match_score(seq1[i-1], seq2[j-1])
55+
score[i][j] = max(0,score_up, score_down, score_diagonal)
56+
if score[i][j] == 0:
57+
pointer[i][j] = 0 # 0 means end of the path
58+
if score[i][j] == score_up:
59+
pointer[i][j] = 1 # 1 means trace up
60+
if score[i][j] == score_down:
61+
pointer[i][j] = 2 # 2 means trace left
62+
if score[i][j] == score_diagonal:
63+
pointer[i][j] = 3 # 3 means trace diagonal
64+
if score[i][j] >= max_score:
65+
max_i = i
66+
max_j = j
67+
max_score = score[i][j];
68+
69+
70+
align1, align2 = '', '' # initial sequences
71+
72+
i,j = max_i,max_j # indices of path starting point
7573

7674
#traceback, follow pointers
77-
while pointer[i][j]!=0:
78-
if pointer[i][j]==3:
79-
align1=align1+seq1[i-1];
80-
align2=align2+seq2[j-1];
81-
i=i-1;
82-
j=j-1;
83-
elif pointer[i][j]==2:
84-
align1=align1+'-';
85-
align2=align2+seq2[j-1];
86-
j=j-1;
87-
elif pointer[i][j]==1:
88-
align1=align1+seq1[i-1];
89-
align2=align2+'-';
90-
i=i-1;
91-
#END of traceback
92-
93-
align1=align1[::-1]; #reverse sequence 1
94-
align2=align2[::-1]; #reverse sequence 2
95-
96-
i,j=0,0;
75+
while pointer[i][j] != 0:
76+
if pointer[i][j] == 3:
77+
align1 = align1 + seq1[i-1]
78+
align2 = align2 + seq2[j-1]
79+
i = i - 1
80+
j = j - 1
81+
elif pointer[i][j] == 2:
82+
align1 = align1 + '-'
83+
align2 = align2 + seq2[j-1]
84+
j = j - 1
85+
elif pointer[i][j] == 1:
86+
align1 = align1 + seq1[i-1]
87+
align2 = align2 + '-'
88+
i = i - 1
89+
90+
align1 = align1[::-1] #reverse sequence 1
91+
align2 = align2[::-1] #reverse sequence 2
92+
93+
i,j = 0,0
9794

9895
#calcuate identity, similarity, score and aligned sequeces
99-
def result(align1,align2):
100-
symbol='';
101-
found=0;
102-
score=0;
103-
identity,similarity=0,0;
96+
def result(align1, align2):
97+
symbol = ''
98+
found = 0
99+
score = 0
100+
identity, similarity = 0, 0
104101
for i in range(0,len(align1)):
105-
if align1[i]==align2[i]: #if two AAs are the same, then output the letter
106-
symbol=symbol+align1[i];
107-
identity=identity+1;
108-
similarity=similarity+1;
109-
score=score+match_score(align1[i],align2[i]);
110-
elif align1[i]!=align2[i] and align1[i]!='-' and align2[i]!='-': #if they are not identical and none of them is gap
111-
score=score+match_score(align1[i],align2[i]);
102+
# if two AAs are the same, then output the letter
103+
if align1[i] == align2[i]:
104+
symbol = symbol + align1[i]
105+
identity = identity + 1
106+
similarity = similarity + 1
107+
score += match_score(align1[i], align2[i])
108+
109+
# if they are not identical and none of them is gap
110+
elif align1[i] != align2[i] and align1[i] != '-' and align2[i] != '-':
111+
score += match_score(align1[i], align2[i])
112112
symbol += ' '
113-
found=0;
114-
elif align1[i]=='-' or align2[i]=='-': #if one of them is a gap, output a space
115-
symbol=symbol+' ';
116-
score = score + gap_penalty
113+
found = 0
117114

118-
identity=float(identity)/len(align1)*100;
119-
similarity=float(similarity)/len(align2)*100;
115+
#if one of them is a gap, output a space
116+
elif align1[i] == '-' or align2[i] == '-':
117+
symbol += ' '
118+
score += gap_penalty
119+
120+
identity = float(identity) / len(align1) * 100
121+
similarity = float(similarity) / len(align2) * 100
120122

121-
print 'Identity =', "%3.3f" % identity, 'percent';
122-
print 'Similarity =', "%3.3f" % similarity, 'percent';
123-
print 'Score =', score;
123+
print 'Identity =', "%3.3f" % identity, 'percent'
124+
print 'Similarity =', "%3.3f" % similarity, 'percent'
125+
print 'Score =', score
124126
print align1
125127
print symbol
126128
print align2
127-
#END of function result
128129

129-
result(align1,align2)
130+
result(align1, align2)

0 commit comments

Comments
 (0)