@@ -8,34 +8,33 @@ import sys, string
88
99# zeros() was origianlly from NumPy. Implemented by alevchuk 2011-04-10
1010def zeros (shape ):
11- retval = []
12- for x in range (shape [0 ]):
13- retval .append ([])
14- for y in range (shape [1 ]):
15- retval [- 1 ].append (0 )
16- return retval
11+ retval = []
12+ for x in range (shape [0 ]):
13+ retval .append ([])
14+ for y in range (shape [1 ]):
15+ retval [- 1 ].append (0 )
16+ return retval
1717
18- #read the first sequence
19- f1 = open (sys .argv [1 ], 'r' )
20- seq1 = f1 .readline ()
21- seq1 = string .strip (seq1 )
18+ # Command-line arguments
19+ f1 = open (sys .argv [1 ], 'r' )
20+ seq1 = f1 .readline ()
21+ seq1 = string .strip (seq1 )
2222
23- #read the second sequence
24- f2 = open (sys .argv [2 ], 'r' )
25- seq2 = f2 .readline ()
26- seq2 = string .strip (seq2 )
23+ f2 = open (sys .argv [2 ], 'r' )
24+ seq2 = f2 .readline ()
25+ seq2 = string .strip (seq2 )
2726
28- m ,n = len (seq1 ),len (seq2 ) #length of two sequences
27+ m , n = len (seq1 ), len (seq2 ) # length of two sequences
2928
3029match_award = 10
3130mismatch_penalty = - 5
3231gap_penalty = - 5 # both for opening and extanding
3332
34- #generate DP table and traceback path pointer matrix
35- score = zeros ((m + 1 ,n + 1 )) #the DP table
36- pointer = zeros ((m + 1 ,n + 1 )) #to store the traceback path
33+ # Generate DP table and traceback path pointer matrix
34+ score = zeros ((m + 1 , n + 1 )) # the DP table
35+ pointer = zeros ((m + 1 , n + 1 )) # to store the traceback path
3736
38- P = 0 ;
37+ P = 0
3938
4039def match_score (alpha , beta ):
4140 if alpha == beta :
@@ -45,85 +44,87 @@ def match_score(alpha, beta):
4544 else :
4645 return mismatch_penalty
4746
48- max_score = P ; #initial maximum score in DP table
49-
50- #calculate DP table and mark pointers
51- for i in range (1 ,m + 1 ):
52- for j in range (1 ,n + 1 ):
53- score_up = score [i - 1 ][j ] + gap_penalty
54- score_down = score [i ][j - 1 ] + gap_penalty
55- score_diagonal = score [i - 1 ][j - 1 ]+ match_score (seq1 [i - 1 ],seq2 [j - 1 ]);
56- score [i ][j ]= max (0 ,score_up ,score_down ,score_diagonal );
57- if score [i ][j ]== 0 :
58- pointer [i ][j ]= 0 ; #0 means end of the path
59- if score [i ][j ]== score_up :
60- pointer [i ][j ]= 1 ; #1 means trace up
61- if score [i ][j ]== score_down :
62- pointer [i ][j ]= 2 ; #2 means trace left
63- if score [i ][j ]== score_diagonal :
64- pointer [i ][j ]= 3 ; #3 means trace diagonal
65- if score [i ][j ]>= max_score :
66- max_i = i ;
67- max_j = j ;
68- max_score = score [i ][j ];
69- #END of DP table
70-
71-
72- align1 ,align2 = '' ,'' ; #initial sequences
73-
74- i ,j = max_i ,max_j ; #indices of path starting point
47+ max_score = P # initial maximum score in DP table
48+
49+ # Calculate DP table and mark pointers
50+ for i in range (1 , m + 1 ):
51+ for j in range (1 , n + 1 ):
52+ score_up = score [i - 1 ][j ] + gap_penalty
53+ score_down = score [i ][j - 1 ] + gap_penalty
54+ score_diagonal = score [i - 1 ][j - 1 ] + match_score (seq1 [i - 1 ], seq2 [j - 1 ])
55+ score [i ][j ] = max (0 ,score_up , score_down , score_diagonal )
56+ if score [i ][j ] == 0 :
57+ pointer [i ][j ] = 0 # 0 means end of the path
58+ if score [i ][j ] == score_up :
59+ pointer [i ][j ] = 1 # 1 means trace up
60+ if score [i ][j ] == score_down :
61+ pointer [i ][j ] = 2 # 2 means trace left
62+ if score [i ][j ] == score_diagonal :
63+ pointer [i ][j ] = 3 # 3 means trace diagonal
64+ if score [i ][j ] >= max_score :
65+ max_i = i
66+ max_j = j
67+ max_score = score [i ][j ];
68+
69+
70+ align1 , align2 = '' , '' # initial sequences
71+
72+ i ,j = max_i ,max_j # indices of path starting point
7573
7674#traceback, follow pointers
77- while pointer [i ][j ]!= 0 :
78- if pointer [i ][j ]== 3 :
79- align1 = align1 + seq1 [i - 1 ];
80- align2 = align2 + seq2 [j - 1 ];
81- i = i - 1 ;
82- j = j - 1 ;
83- elif pointer [i ][j ]== 2 :
84- align1 = align1 + '-' ;
85- align2 = align2 + seq2 [j - 1 ];
86- j = j - 1 ;
87- elif pointer [i ][j ]== 1 :
88- align1 = align1 + seq1 [i - 1 ];
89- align2 = align2 + '-' ;
90- i = i - 1 ;
91- #END of traceback
92-
93- align1 = align1 [::- 1 ]; #reverse sequence 1
94- align2 = align2 [::- 1 ]; #reverse sequence 2
95-
96- i ,j = 0 ,0 ;
75+ while pointer [i ][j ] != 0 :
76+ if pointer [i ][j ] == 3 :
77+ align1 = align1 + seq1 [i - 1 ]
78+ align2 = align2 + seq2 [j - 1 ]
79+ i = i - 1
80+ j = j - 1
81+ elif pointer [i ][j ] == 2 :
82+ align1 = align1 + '-'
83+ align2 = align2 + seq2 [j - 1 ]
84+ j = j - 1
85+ elif pointer [i ][j ] == 1 :
86+ align1 = align1 + seq1 [i - 1 ]
87+ align2 = align2 + '-'
88+ i = i - 1
89+
90+ align1 = align1 [::- 1 ] #reverse sequence 1
91+ align2 = align2 [::- 1 ] #reverse sequence 2
92+
93+ i ,j = 0 ,0
9794
9895#calcuate identity, similarity, score and aligned sequeces
99- def result (align1 ,align2 ):
100- symbol = '' ;
101- found = 0 ;
102- score = 0 ;
103- identity ,similarity = 0 , 0 ;
96+ def result (align1 , align2 ):
97+ symbol = ''
98+ found = 0
99+ score = 0
100+ identity , similarity = 0 , 0
104101 for i in range (0 ,len (align1 )):
105- if align1 [i ]== align2 [i ]: #if two AAs are the same, then output the letter
106- symbol = symbol + align1 [i ];
107- identity = identity + 1 ;
108- similarity = similarity + 1 ;
109- score = score + match_score (align1 [i ],align2 [i ]);
110- elif align1 [i ]!= align2 [i ] and align1 [i ]!= '-' and align2 [i ]!= '-' : #if they are not identical and none of them is gap
111- score = score + match_score (align1 [i ],align2 [i ]);
102+ # if two AAs are the same, then output the letter
103+ if align1 [i ] == align2 [i ]:
104+ symbol = symbol + align1 [i ]
105+ identity = identity + 1
106+ similarity = similarity + 1
107+ score += match_score (align1 [i ], align2 [i ])
108+
109+ # if they are not identical and none of them is gap
110+ elif align1 [i ] != align2 [i ] and align1 [i ] != '-' and align2 [i ] != '-' :
111+ score += match_score (align1 [i ], align2 [i ])
112112 symbol += ' '
113- found = 0 ;
114- elif align1 [i ]== '-' or align2 [i ]== '-' : #if one of them is a gap, output a space
115- symbol = symbol + ' ' ;
116- score = score + gap_penalty
113+ found = 0
117114
118- identity = float (identity )/ len (align1 )* 100 ;
119- similarity = float (similarity )/ len (align2 )* 100 ;
115+ #if one of them is a gap, output a space
116+ elif align1 [i ] == '-' or align2 [i ] == '-' :
117+ symbol += ' '
118+ score += gap_penalty
119+
120+ identity = float (identity ) / len (align1 ) * 100
121+ similarity = float (similarity ) / len (align2 ) * 100
120122
121- print 'Identity =' , "%3.3f" % identity , 'percent' ;
122- print 'Similarity =' , "%3.3f" % similarity , 'percent' ;
123- print 'Score =' , score ;
123+ print 'Identity =' , "%3.3f" % identity , 'percent'
124+ print 'Similarity =' , "%3.3f" % similarity , 'percent'
125+ print 'Score =' , score
124126 print align1
125127 print symbol
126128 print align2
127- #END of function result
128129
129- result (align1 ,align2 )
130+ result (align1 , align2 )
0 commit comments