Skip to content

Commit

Permalink
remove find matches functions
Browse files Browse the repository at this point in the history
  • Loading branch information
CharliesCodes committed May 24, 2022
1 parent 1088db0 commit 370b43f
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 111 deletions.
31 changes: 6 additions & 25 deletions algorithms/free_shift_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,7 @@
GAP = -10


def fill_matches(score_matrix, ls1, ls2, seq1, seq2):
"""creates a substitution matrix.
Starts with second row and column.
Args:
score_matrix (np matrix): initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
Returns:
numpy matrix: substitution matrix inside
initialized matrix
"""
for y in range(1, ls2):
for x in range(1, ls1):
score_matrix[y][x] = MATCH if seq1[x] == seq2[y] else MISMATCH
return score_matrix


def recalculate_scorematrix(score_matrix, ls1, ls2):
def calculate_scorematrix(score_matrix, ls1, ls2, seq1, seq2):
"""creates a scorematrix with help of the
substitution matrix and rewards/ penalties
Expand All @@ -53,15 +32,18 @@ def recalculate_scorematrix(score_matrix, ls1, ls2):
initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
Returns:
numpy matrix: scorematrix
"""
# start at second row and column -> skips initialized cells
for y in range(1, ls2):
for x in range(1, ls1):
match_value = MATCH if seq1[x] == seq2[y] else MISMATCH
score_matrix[y][x] = max(
score_matrix[y-1][x-1] + score_matrix[y][x],
score_matrix[y-1][x-1] + match_value,
score_matrix[y-1][x] + GAP,
score_matrix[y][x-1] + GAP
)
Expand Down Expand Up @@ -239,8 +221,7 @@ def main(seq1='', seq2='', assembly=False):
ls1 = len(seq1)
ls2 = len(seq2)
score_matrix = np.zeros((ls2, ls1), dtype="int16")
score_matrix = fill_matches(score_matrix, ls1, ls2, seq1, seq2)
score_matrix = recalculate_scorematrix(score_matrix, ls1, ls2)
score_matrix = calculate_scorematrix(score_matrix, ls1, ls2, seq1, seq2)
max_border_coords = get_max_from_border(score_matrix)
seq1_new, seq2_new = traceback(
score_matrix, seq1, seq2, max_border_coords)
Expand Down
43 changes: 10 additions & 33 deletions algorithms/multiple_sequence_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,6 @@
GAP = -2


def fill_matches(score_matrix, ls1, ls2, ls3, seq1, seq2, seq3):
"""creates a substitution matrix inside
initialized matrix.
Starts with second row and column.
Args:
score_matrix (np matrix): initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
ls3 (int): len third sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
seq2 (str): third sequence to alignt
Returns:
numpy matrix: substitution matrix inside
initialized matrix
"""
for z in range(ls3):
for y in range(ls2):
for x in range(ls1):
if ((seq1[x] == seq2[y]) or (seq1[x] == seq3[z]) or (seq2[y] == seq3[z])):
score_matrix[z][y][x] = MATCH
else:
score_matrix[z][y][x] = MISMATCH
return score_matrix


def initialize_matrix(score_matrix, ls1, ls2, ls3):
"""fillst first row and column with
given penalties
Expand All @@ -76,7 +48,7 @@ def initialize_matrix(score_matrix, ls1, ls2, ls3):
return score_matrix


def recalculate_scorematrix(score_matrix, ls1, ls2, ls3):
def calculate_scorematrix(score_matrix, ls1, ls2, ls3, seq1, seq2, seq3):
"""creates a scorematrix with help of the
substitution matrix and rewards/ penalties
Expand All @@ -95,6 +67,11 @@ def recalculate_scorematrix(score_matrix, ls1, ls2, ls3):
for y in range(ls2):
for x in range(ls1):
coords = (x,y,z)
if ((seq1[x] == seq2[y]) or (seq1[x] == seq3[z]) or (seq2[y] == seq3[z])):
match_value = MATCH
else:
match_value = MISMATCH

# surfaces
if coords.count(0) == 1:
# front surface
Expand Down Expand Up @@ -301,17 +278,17 @@ def calc_similarity(alignment_output):

def main():
# change sequences below for your needs!
seq1 = ",AA,"
seq2 = ",AT,"
seq1 = ",AT,"
seq2 = ",AA,"
seq3 = ",AA,"

ls1 = len(seq1)
ls2 = len(seq2)
ls3 = len(seq3)
score_matrix = np.zeros((ls3, ls2, ls1), dtype="int16")
score_matrix = fill_matches(score_matrix, ls1, ls2, ls3, seq1, seq2, seq3)
score_matrix = initialize_matrix(score_matrix, ls1, ls2, ls3)
score_matrix = recalculate_scorematrix(score_matrix, ls1, ls2, ls3)
score_matrix = calculate_scorematrix(
score_matrix, ls1, ls2, ls3, seq1, seq2, seq3)
seq1_new, seq2_new, seq3_new = traceback(score_matrix, ls1, ls2, ls3, seq1, seq2, seq3)
alignment1_2, alignment1_3, alignment2_3 = output(
seq1_new, seq2_new, seq3_new)
Expand Down
32 changes: 6 additions & 26 deletions algorithms/needleman_wunsch.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,29 +44,7 @@ def initialize_matrix(score_matrix, ls1, ls2):
return score_matrix


def fill_matches(score_matrix, ls1, ls2, seq1, seq2):
"""creates a substitution matrix inside
initialized matrix.
Starts with second row and column.
Args:
score_matrix (np matrix): initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
Returns:
numpy matrix: substitution matrix inside
initialized matrix
"""
for y in range(1, ls2):
for x in range(1, ls1):
score_matrix[y][x] = MATCH if seq1[x] == seq2[y] else MISMATCH
return score_matrix


def recalculate_scorematrix(score_matrix, ls1, ls2):
def calculate_scorematrix(score_matrix, ls1, ls2, seq1, seq2):
"""creates a scorematrix with help of the
substitution matrix and rewards/ penalties
Expand All @@ -75,15 +53,18 @@ def recalculate_scorematrix(score_matrix, ls1, ls2):
initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
Returns:
numpy matrix: scorematrix
"""
# start at second row and column -> skips initialized cells
for y in range(1, ls2):
for x in range(1, ls1):
match_value = MATCH if seq1[x] == seq2[y] else MISMATCH
score_matrix[y][x] = max(
score_matrix[y-1][x-1] + score_matrix[y][x],
score_matrix[y-1][x-1] + match_value,
score_matrix[y-1][x] + GAP,
score_matrix[y][x-1] + GAP
)
Expand Down Expand Up @@ -195,8 +176,7 @@ def main(seq1='', seq2=''):
ls2 = len(seq2)
score_matrix = np.zeros((ls2, ls1), dtype="int16")
score_matrix = initialize_matrix(score_matrix, ls1, ls2)
score_matrix = fill_matches(score_matrix, ls1, ls2, seq1, seq2)
score_matrix = recalculate_scorematrix(score_matrix, ls1, ls2)
score_matrix = calculate_scorematrix(score_matrix, ls1, ls2, seq1, seq2)
seq1_new, seq2_new = traceback(score_matrix, ls1, ls2, seq1, seq2)
alignment_output = output(seq1_new, seq2_new)
sim_tup = calc_similarity(alignment_output)
Expand Down
35 changes: 8 additions & 27 deletions algorithms/smith_waterman.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,7 @@
GAP = -2


def fill_matches(score_matrix, ls1, ls2, seq1, seq2):
"""creates a substitution matrix inside
initialized matrix.
Starts with second row and column.
Args:
score_matrix (np matrix): initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
Returns:
numpy matrix: substitution matrix inside
initialized matrix
"""
for y in range(1, ls2):
for x in range(1, ls1):
score_matrix[y][x] = MATCH if seq1[x] == seq2[y] else MISMATCH
return score_matrix


def recalculate_scorematrix(score_matrix, ls1, ls2):
def calculate_scorematrix(score_matrix, ls1, ls2, seq1, seq2):
"""creates a scorematrix with help of the
substitution matrix and rewards/ penalties
Expand All @@ -55,18 +33,22 @@ def recalculate_scorematrix(score_matrix, ls1, ls2):
initialized matrix
ls1 (int): len first sequence to alignt
ls2 (int): len second sequence to alignt
seq1 (str): first sequence to alignt
seq2 (str): second sequence to alignt
Returns:
numpy matrix: scorematrix
"""
# start at second row and column -> skips initialized cells
for y in range(1, ls2):
for x in range(1, ls1):
match_value = MATCH if seq1[x] == seq2[y] else MISMATCH
score_matrix[y][x] = max(
0,
score_matrix[y-1][x-1] + score_matrix[y][x],
score_matrix[y-1][x-1] + match_value,
score_matrix[y-1][x] + GAP,
score_matrix[y][x-1] + GAP
)
)
return score_matrix


Expand Down Expand Up @@ -180,8 +162,7 @@ def main(seq1='', seq2=''):
ls1 = len(seq1)
ls2 = len(seq2)
score_matrix = np.zeros((ls2, ls1), dtype="int16")
score_matrix = fill_matches(score_matrix, ls1, ls2, seq1, seq2)
score_matrix = recalculate_scorematrix(score_matrix, ls1, ls2)
score_matrix = calculate_scorematrix(score_matrix, ls1, ls2, seq1, seq2)
max_coords = find_max_coordinates(score_matrix)
seq1_new, seq2_new = traceback(score_matrix, seq1, seq2, max_coords)
alignment_output = output(seq1_new, seq2_new)
Expand Down

0 comments on commit 370b43f

Please sign in to comment.