Skip to content

Commit 015f306

Browse files
committed
Implemented the Needleman-Wunsch algorithm
1 parent b4d870a commit 015f306

File tree

4 files changed

+128
-53
lines changed

4 files changed

+128
-53
lines changed

pairwise_alignment.py renamed to alignment.py

Lines changed: 105 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -26,57 +26,7 @@ def match_score(alpha, beta):
2626
else:
2727
return mismatch_penalty
2828

29-
def water(seq1, seq2):
30-
m, n = len(seq1), len(seq2) # length of two sequences
31-
32-
# Generate DP table and traceback path pointer matrix
33-
score = zeros((m+1, n+1)) # the DP table
34-
pointer = zeros((m+1, n+1)) # to store the traceback path
35-
36-
P = 0
37-
max_score = P # initial maximum score in DP table
38-
39-
# Calculate DP table and mark pointers
40-
for i in range(1, m + 1):
41-
for j in range(1, n + 1):
42-
score_up = score[i-1][j] + gap_penalty
43-
score_down = score[i][j-1] + gap_penalty
44-
score_diagonal = score[i-1][j-1] + match_score(seq1[i-1], seq2[j-1])
45-
score[i][j] = max(0,score_up, score_down, score_diagonal)
46-
if score[i][j] == 0:
47-
pointer[i][j] = 0 # 0 means end of the path
48-
if score[i][j] == score_up:
49-
pointer[i][j] = 1 # 1 means trace up
50-
if score[i][j] == score_down:
51-
pointer[i][j] = 2 # 2 means trace left
52-
if score[i][j] == score_diagonal:
53-
pointer[i][j] = 3 # 3 means trace diagonal
54-
if score[i][j] >= max_score:
55-
max_i = i
56-
max_j = j
57-
max_score = score[i][j];
58-
59-
60-
align1, align2 = '', '' # initial sequences
61-
62-
i,j = max_i,max_j # indices of path starting point
63-
64-
#traceback, follow pointers
65-
while pointer[i][j] != 0:
66-
if pointer[i][j] == 3:
67-
align1 = align1 + seq1[i-1]
68-
align2 = align2 + seq2[j-1]
69-
i = i - 1
70-
j = j - 1
71-
elif pointer[i][j] == 2:
72-
align1 = align1 + '-'
73-
align2 = align2 + seq2[j-1]
74-
j = j - 1
75-
elif pointer[i][j] == 1:
76-
align1 = align1 + seq1[i-1]
77-
align2 = align2 + '-'
78-
i = i - 1
79-
29+
def finalize(align1, align2):
8030
align1 = align1[::-1] #reverse sequence 1
8131
align2 = align2[::-1] #reverse sequence 2
8232

@@ -112,3 +62,107 @@ def water(seq1, seq2):
11262
print align1
11363
print symbol
11464
print align2
65+
66+
67+
def needle(seq1, seq2):
68+
m, n = len(seq1), len(seq2) # length of two sequences
69+
70+
# Generate DP table and traceback path pointer matrix
71+
score = zeros((m+1, n+1)) # the DP table
72+
73+
# Calculate DP table
74+
for i in range(0, m + 1):
75+
score[i][0] = gap_penalty * i
76+
for j in range(0, n + 1):
77+
score[0][j] = gap_penalty * j
78+
for i in range(1, m + 1):
79+
for j in range(1, n + 1):
80+
match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1])
81+
delete = score[i - 1][j] + gap_penalty
82+
insert = score[i][j - 1] + gap_penalty
83+
score[i][j] = max(match, delete, insert)
84+
85+
# Traceback and compute the alignment
86+
align1, align2 = '', ''
87+
i,j = m,n # start from the bottom right cell
88+
while i > 0 and j > 0: # end toching the top or the left edge
89+
score_current = score[i][j]
90+
score_diagonal = score[i-1][j-1]
91+
score_up = score[i][j-1]
92+
score_left = score[i-1][j]
93+
94+
if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]):
95+
align1 += seq1[i-1]
96+
align2 += seq2[j-1]
97+
i -= 1
98+
j -= 1
99+
elif score_current == score_left + gap_penalty:
100+
align1 += seq1[i-1]
101+
align2 += '-'
102+
i -= 1
103+
elif score_current == score_up + gap_penalty:
104+
align1 += '-'
105+
align2 += seq2[j-1]
106+
j -= 1
107+
108+
# Finish tracing up to the top left cell
109+
while i > 0:
110+
align1 += seq1[i-1]
111+
align2 += '-'
112+
i -= 1
113+
while j > 0:
114+
align1 += '-'
115+
align2 += seq2[j-1]
116+
j -= 1
117+
118+
finalize(align1, align2)
119+
120+
def water(seq1, seq2):
121+
m, n = len(seq1), len(seq2) # length of two sequences
122+
123+
# Generate DP table and traceback path pointer matrix
124+
score = zeros((m+1, n+1)) # the DP table
125+
pointer = zeros((m+1, n+1)) # to store the traceback path
126+
127+
max_score = 0 # initial maximum score in DP table
128+
# Calculate DP table and mark pointers
129+
for i in range(1, m + 1):
130+
for j in range(1, n + 1):
131+
score_diagonal = score[i-1][j-1] + match_score(seq1[i-1], seq2[j-1])
132+
score_up = score[i][j-1] + gap_penalty
133+
score_left = score[i-1][j] + gap_penalty
134+
score[i][j] = max(0,score_left, score_up, score_diagonal)
135+
if score[i][j] == 0:
136+
pointer[i][j] = 0 # 0 means end of the path
137+
if score[i][j] == score_left:
138+
pointer[i][j] = 1 # 1 means trace up
139+
if score[i][j] == score_up:
140+
pointer[i][j] = 2 # 2 means trace left
141+
if score[i][j] == score_diagonal:
142+
pointer[i][j] = 3 # 3 means trace diagonal
143+
if score[i][j] >= max_score:
144+
max_i = i
145+
max_j = j
146+
max_score = score[i][j];
147+
148+
align1, align2 = '', '' # initial sequences
149+
150+
i,j = max_i,max_j # indices of path starting point
151+
152+
#traceback, follow pointers
153+
while pointer[i][j] != 0:
154+
if pointer[i][j] == 3:
155+
align1 += seq1[i-1]
156+
align2 += seq2[j-1]
157+
i -= 1
158+
j -= 1
159+
elif pointer[i][j] == 2:
160+
align1 += '-'
161+
align2 += seq2[j-1]
162+
j -= 1
163+
elif pointer[i][j] == 1:
164+
align1 += seq1[i-1]
165+
align2 += '-'
166+
i -= 1
167+
168+
finalize(align1, align2)

needle

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/usr/bin/env python
2+
3+
import sys, string
4+
import alignment
5+
6+
# Command-line arguments
7+
f1 = open(sys.argv[1], 'r')
8+
seq1 = f1.readline()
9+
seq1 = string.strip(seq1)
10+
11+
f2 = open(sys.argv[2], 'r')
12+
seq2 = f2.readline()
13+
seq2 = string.strip(seq2)
14+
15+
alignment.needle(seq1, seq2)

needle-test

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
./needle <(echo ALEXNILDCQGAAA%LIN@%) <(echo PYTHONNILDCQGHHHH%LINJJBB)
4+
echo
5+
6+
./needle <(echo ONNALEXNILDCQGAAA%LIN@%BBSDF) <(echo PYTHONNILDCQGHHHH%LINJJBB)

water

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22

33
import sys, string
4-
import pairwise_alignment
4+
import alignment
55

66
# Command-line arguments
77
f1 = open(sys.argv[1], 'r')
@@ -12,4 +12,4 @@ f2 = open(sys.argv[2], 'r')
1212
seq2 = f2.readline()
1313
seq2 = string.strip(seq2)
1414

15-
pairwise_alignment.water(seq1, seq2)
15+
alignment.water(seq1, seq2)

0 commit comments

Comments
 (0)