@@ -25,63 +25,34 @@ f2=open(sys.argv[2], 'r')
2525seq2 = f2 .readline ()
2626seq2 = string .strip (seq2 )
2727
28- #read in BLOSUM62 matrix
29- f3 = open (sys .argv [3 ],'r' )
30- BLOSUM62 = []
31- for line in f3 .readlines ():
32- BLOSUM62 .append (map (int , line .split ()))
33-
34- #define similar amino acids
35- similarAA = ['ST' ,'TS' ,'SP' ,'PS' ,'SA' ,'AS' ,'SG' ,'GS' ,'TP' ,'PT' ,'TA' ,'AT' ,'TG' ,'GT' ,'PA' ,'AP' ,'PG' ,'GP' ,'AG' ,'GA' ,'ND' ,'DN' ,'NE' ,'EN' ,'NQ' ,'QN' ,'DE' ,'ED' ,'DQ' ,
36- 'QD' ,'EQ' ,'QE' ,'HR' ,'RH' ,'HK' ,'KH' ,'RK' ,'KR' ,'MI' ,'IM' ,'ML' ,'LM' ,'MV' ,'VM' ,'IL' ,'LI' ,'IV' ,'VI' ,'LV' ,'VL' ,'FY' ,'YF' ,'FW' ,'WF' ,'YW' ,'WY' ];
37-
3828m ,n = len (seq1 ),len (seq2 ) #length of two sequences
3929
40- penalty = - 4 ; #define the gap penalty
30+ match_award = 10
31+ mismatch_penalty = - 5
32+ gap_penalty = - 5 # both for opening and extanding
4133
4234#generate DP table and traceback path pointer matrix
4335score = zeros ((m + 1 ,n + 1 )) #the DP table
4436pointer = zeros ((m + 1 ,n + 1 )) #to store the traceback path
4537
4638P = 0 ;
4739
48- def match_score (alpha ,beta ,BLOSUM62 ): #the function to find match/dismatch score from BLOSUM62 by letters of AAs
49- alphabet = {}
50- alphabet ["A" ] = 0
51- alphabet ["R" ] = 1
52- alphabet ["N" ] = 2
53- alphabet ["D" ] = 3
54- alphabet ["C" ] = 4
55- alphabet ["Q" ] = 5
56- alphabet ["E" ] = 6
57- alphabet ["G" ] = 7
58- alphabet ["H" ] = 8
59- alphabet ["I" ] = 9
60- alphabet ["L" ] = 10
61- alphabet ["K" ] = 11
62- alphabet ["M" ] = 12
63- alphabet ["F" ] = 13
64- alphabet ["P" ] = 14
65- alphabet ["S" ] = 15
66- alphabet ["T" ] = 16
67- alphabet ["W" ] = 17
68- alphabet ["Y" ] = 18
69- alphabet ["V" ] = 19
70- alphabet ["B" ] = 20
71- alphabet ["Z" ] = 21
72- alphabet ["X" ] = 22
73- lut_x = alphabet [alpha ]
74- lut_y = alphabet [beta ]
75- return BLOSUM62 [lut_x ][lut_y ]
40+ def match_score (alpha , beta ):
41+ if alpha == beta :
42+ return match_award
43+ elif alpha == '-' or beta == '-' :
44+ return gap_penalty
45+ else :
46+ return mismatch_penalty
7647
7748max_score = P ; #initial maximum score in DP table
7849
7950#calculate DP table and mark pointers
8051for i in range (1 ,m + 1 ):
8152 for j in range (1 ,n + 1 ):
82- score_up = score [i - 1 ][j ]+ penalty ;
83- score_down = score [i ][j - 1 ]+ penalty ;
84- score_diagonal = score [i - 1 ][j - 1 ]+ match_score (seq1 [i - 1 ],seq2 [j - 1 ], BLOSUM62 );
53+ score_up = score [i - 1 ][j ] + gap_penalty
54+ score_down = score [i ][j - 1 ] + gap_penalty
55+ score_diagonal = score [i - 1 ][j - 1 ]+ match_score (seq1 [i - 1 ],seq2 [j - 1 ]);
8556 score [i ][j ]= max (0 ,score_up ,score_down ,score_diagonal );
8657 if score [i ][j ]== 0 :
8758 pointer [i ][j ]= 0 ; #0 means end of the path
@@ -125,32 +96,24 @@ align2=align2[::-1]; #reverse sequence 2
12596i ,j = 0 ,0 ;
12697
12798#calcuate identity, similarity, score and aligned sequeces
128- def result (align1 ,align2 , BLOSUM62 ):
99+ def result (align1 ,align2 ):
129100 symbol = '' ;
130101 found = 0 ;
131102 score = 0 ;
132- penalty = - 4 ;
133103 identity ,similarity = 0 ,0 ;
134104 for i in range (0 ,len (align1 )):
135105 if align1 [i ]== align2 [i ]: #if two AAs are the same, then output the letter
136106 symbol = symbol + align1 [i ];
137107 identity = identity + 1 ;
138108 similarity = similarity + 1 ;
139- score = score + match_score (align1 [i ],align2 [i ], BLOSUM62 );
109+ score = score + match_score (align1 [i ],align2 [i ]);
140110 elif align1 [i ]!= align2 [i ] and align1 [i ]!= '-' and align2 [i ]!= '-' : #if they are not identical and none of them is gap
141- score = score + match_score (align1 [i ],align2 [i ],BLOSUM62 );
142- for j in range (0 ,len (similarAA )- 1 ):
143- if align1 [i ]+ align2 [i ]== similarAA [j ]: #search whether these two AAs form a pair in similar AA table
144- found = 1 ;
145- if found == 1 :
146- symbol = symbol + ':' ; #if they are similar AA, output ':'
147- similarity = similarity + 1 ;
148- if found == 0 :
149- symbol = symbol + ' ' ; #o/w, output a space
111+ score = score + match_score (align1 [i ],align2 [i ]);
112+ symbol += ' '
150113 found = 0 ;
151114 elif align1 [i ]== '-' or align2 [i ]== '-' : #if one of them is a gap, output a space
152115 symbol = symbol + ' ' ;
153- score = score + penalty ;
116+ score = score + gap_penalty
154117
155118 identity = float (identity )/ len (align1 )* 100 ;
156119 similarity = float (similarity )/ len (align2 )* 100 ;
@@ -163,4 +126,4 @@ def result(align1,align2,BLOSUM62):
163126 print align2
164127#END of function result
165128
166- result (align1 ,align2 , BLOSUM62 )
129+ result (align1 ,align2 )
0 commit comments