@@ -26,57 +26,7 @@ def match_score(alpha, beta):
2626 else :
2727 return mismatch_penalty
2828
29- def water (seq1 , seq2 ):
30- m , n = len (seq1 ), len (seq2 ) # length of two sequences
31-
32- # Generate DP table and traceback path pointer matrix
33- score = zeros ((m + 1 , n + 1 )) # the DP table
34- pointer = zeros ((m + 1 , n + 1 )) # to store the traceback path
35-
36- P = 0
37- max_score = P # initial maximum score in DP table
38-
39- # Calculate DP table and mark pointers
40- for i in range (1 , m + 1 ):
41- for j in range (1 , n + 1 ):
42- score_up = score [i - 1 ][j ] + gap_penalty
43- score_down = score [i ][j - 1 ] + gap_penalty
44- score_diagonal = score [i - 1 ][j - 1 ] + match_score (seq1 [i - 1 ], seq2 [j - 1 ])
45- score [i ][j ] = max (0 ,score_up , score_down , score_diagonal )
46- if score [i ][j ] == 0 :
47- pointer [i ][j ] = 0 # 0 means end of the path
48- if score [i ][j ] == score_up :
49- pointer [i ][j ] = 1 # 1 means trace up
50- if score [i ][j ] == score_down :
51- pointer [i ][j ] = 2 # 2 means trace left
52- if score [i ][j ] == score_diagonal :
53- pointer [i ][j ] = 3 # 3 means trace diagonal
54- if score [i ][j ] >= max_score :
55- max_i = i
56- max_j = j
57- max_score = score [i ][j ];
58-
59-
60- align1 , align2 = '' , '' # initial sequences
61-
62- i ,j = max_i ,max_j # indices of path starting point
63-
64- #traceback, follow pointers
65- while pointer [i ][j ] != 0 :
66- if pointer [i ][j ] == 3 :
67- align1 = align1 + seq1 [i - 1 ]
68- align2 = align2 + seq2 [j - 1 ]
69- i = i - 1
70- j = j - 1
71- elif pointer [i ][j ] == 2 :
72- align1 = align1 + '-'
73- align2 = align2 + seq2 [j - 1 ]
74- j = j - 1
75- elif pointer [i ][j ] == 1 :
76- align1 = align1 + seq1 [i - 1 ]
77- align2 = align2 + '-'
78- i = i - 1
79-
29+ def finalize (align1 , align2 ):
8030 align1 = align1 [::- 1 ] #reverse sequence 1
8131 align2 = align2 [::- 1 ] #reverse sequence 2
8232
@@ -112,3 +62,107 @@ def water(seq1, seq2):
11262 print align1
11363 print symbol
11464 print align2
65+
66+
67+ def needle (seq1 , seq2 ):
68+ m , n = len (seq1 ), len (seq2 ) # length of two sequences
69+
70+ # Generate DP table and traceback path pointer matrix
71+ score = zeros ((m + 1 , n + 1 )) # the DP table
72+
73+ # Calculate DP table
74+ for i in range (0 , m + 1 ):
75+ score [i ][0 ] = gap_penalty * i
76+ for j in range (0 , n + 1 ):
77+ score [0 ][j ] = gap_penalty * j
78+ for i in range (1 , m + 1 ):
79+ for j in range (1 , n + 1 ):
80+ match = score [i - 1 ][j - 1 ] + match_score (seq1 [i - 1 ], seq2 [j - 1 ])
81+ delete = score [i - 1 ][j ] + gap_penalty
82+ insert = score [i ][j - 1 ] + gap_penalty
83+ score [i ][j ] = max (match , delete , insert )
84+
85+ # Traceback and compute the alignment
86+ align1 , align2 = '' , ''
87+ i ,j = m ,n # start from the bottom right cell
88+ while i > 0 and j > 0 : # end toching the top or the left edge
89+ score_current = score [i ][j ]
90+ score_diagonal = score [i - 1 ][j - 1 ]
91+ score_up = score [i ][j - 1 ]
92+ score_left = score [i - 1 ][j ]
93+
94+ if score_current == score_diagonal + match_score (seq1 [i - 1 ], seq2 [j - 1 ]):
95+ align1 += seq1 [i - 1 ]
96+ align2 += seq2 [j - 1 ]
97+ i -= 1
98+ j -= 1
99+ elif score_current == score_left + gap_penalty :
100+ align1 += seq1 [i - 1 ]
101+ align2 += '-'
102+ i -= 1
103+ elif score_current == score_up + gap_penalty :
104+ align1 += '-'
105+ align2 += seq2 [j - 1 ]
106+ j -= 1
107+
108+ # Finish tracing up to the top left cell
109+ while i > 0 :
110+ align1 += seq1 [i - 1 ]
111+ align2 += '-'
112+ i -= 1
113+ while j > 0 :
114+ align1 += '-'
115+ align2 += seq2 [j - 1 ]
116+ j -= 1
117+
118+ finalize (align1 , align2 )
119+
120+ def water (seq1 , seq2 ):
121+ m , n = len (seq1 ), len (seq2 ) # length of two sequences
122+
123+ # Generate DP table and traceback path pointer matrix
124+ score = zeros ((m + 1 , n + 1 )) # the DP table
125+ pointer = zeros ((m + 1 , n + 1 )) # to store the traceback path
126+
127+ max_score = 0 # initial maximum score in DP table
128+ # Calculate DP table and mark pointers
129+ for i in range (1 , m + 1 ):
130+ for j in range (1 , n + 1 ):
131+ score_diagonal = score [i - 1 ][j - 1 ] + match_score (seq1 [i - 1 ], seq2 [j - 1 ])
132+ score_up = score [i ][j - 1 ] + gap_penalty
133+ score_left = score [i - 1 ][j ] + gap_penalty
134+ score [i ][j ] = max (0 ,score_left , score_up , score_diagonal )
135+ if score [i ][j ] == 0 :
136+ pointer [i ][j ] = 0 # 0 means end of the path
137+ if score [i ][j ] == score_left :
138+ pointer [i ][j ] = 1 # 1 means trace up
139+ if score [i ][j ] == score_up :
140+ pointer [i ][j ] = 2 # 2 means trace left
141+ if score [i ][j ] == score_diagonal :
142+ pointer [i ][j ] = 3 # 3 means trace diagonal
143+ if score [i ][j ] >= max_score :
144+ max_i = i
145+ max_j = j
146+ max_score = score [i ][j ];
147+
148+ align1 , align2 = '' , '' # initial sequences
149+
150+ i ,j = max_i ,max_j # indices of path starting point
151+
152+ #traceback, follow pointers
153+ while pointer [i ][j ] != 0 :
154+ if pointer [i ][j ] == 3 :
155+ align1 += seq1 [i - 1 ]
156+ align2 += seq2 [j - 1 ]
157+ i -= 1
158+ j -= 1
159+ elif pointer [i ][j ] == 2 :
160+ align1 += '-'
161+ align2 += seq2 [j - 1 ]
162+ j -= 1
163+ elif pointer [i ][j ] == 1 :
164+ align1 += seq1 [i - 1 ]
165+ align2 += '-'
166+ i -= 1
167+
168+ finalize (align1 , align2 )
0 commit comments