Skip to content

Commit 14ba7ff

Browse files
committed
[Feature] Added finding edit distance between 2 strings.
1 parent 2ab7fcb commit 14ba7ff

File tree

3 files changed

+99
-1
lines changed

3 files changed

+99
-1
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,5 @@ Different algorithmic programs. Grouped by general topic.
8585
* [Trie](algorithms/string/longest_common_prefix.py)
8686
* [Reduce](algorithms/string/longest_common_prefix.py)
8787
* [Finding Substring](algorithms/string/rabin_karp_substring_search.py)
88-
* [Rabin-Karp](algorithms/string/rabin_karp_substring_search.py)
88+
* [Rabin-Karp](algorithms/string/rabin_karp_substring_search.py)
89+
* [Calculate Edit Distance](algorithms/string/edit_distance.py)

algorithms/string/edit_distance.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
def calculate_edit_distance(str1, str2):
2+
'''
3+
Calculate the edit distance between two strings.
4+
5+
An edit is defined as one of three actions, a deletion,
6+
a replacement, or an addition.
7+
8+
'''
9+
10+
11+
# operation enums
12+
MATCH, INSERT, DELETE = 0, 1, 2
13+
14+
# three possible operations @ each point
15+
opt = [0,0,0]
16+
17+
# 2D array to hold all edit distance data
18+
distance = [[0] * (len(str1)+1) for _ in range(len(str2)+1)]
19+
# 2D array to hold parent least cost relationships
20+
parent = [[0] * (len(str1)+1) for _ in range(len(str2)+1)]
21+
22+
str1 = " " + str1
23+
str2 = " " + str2
24+
25+
# initial values
26+
for i in range(len(str2)):
27+
distance[i][0] = i
28+
parent[i][0] = DELETE
29+
30+
31+
for j in range(len(str1)):
32+
distance[0][j] = j
33+
parent[0][j] = INSERT
34+
35+
distance[0][0] = 0
36+
parent[0][0] = -1
37+
38+
39+
# go through every letter combination
40+
for i in range(1, len(str2)):
41+
for j in range(1, len(str1)):
42+
opt = [0,0,0]
43+
44+
# populate with edit data
45+
if j > 0:
46+
opt[INSERT] = distance[i][j-1] + 1 # indel
47+
if i > 0:
48+
opt[DELETE] = distance[i-1][j] + 1 # indel
49+
if j > 0 and i > 0:
50+
opt[MATCH] = distance[i-1][j-1] + (0 if str1[j] == str2[i] else 1) # match or substitution
51+
52+
# find min cost operation
53+
lowest_cost = min(opt)
54+
parent_opt = opt.index(lowest_cost)
55+
# print(opt, lowest_cost, parent_opt)
56+
distance[i][j] = lowest_cost
57+
parent[i][j] = parent_opt
58+
59+
# for i in range(len(distance)):
60+
# print(distance[i])
61+
# print('-----')
62+
# for i in range(len(parent)):
63+
# print(parent[i])
64+
65+
# traceback
66+
current_pos = (len(str2)-1, len(str1)-1)
67+
D,I,M,S = 'Delete','Insert','M','Substitute'
68+
trace_stack = []
69+
while parent[current_pos[0]][current_pos[1]] != -1:
70+
parent_val = parent[current_pos[0]][current_pos[1]]
71+
if parent_val == 0:
72+
if str2[current_pos[0]] == str1[current_pos[1]]:
73+
# trace_stack.append(M)
74+
pass
75+
else:
76+
trace_stack.append(S + ' ' + str1[current_pos[1]])
77+
78+
current_pos = (current_pos[0]-1, current_pos[1]-1)
79+
elif parent_val == 1:
80+
trace_stack.append(I + ' ' + str1[current_pos[1]])
81+
current_pos = (current_pos[0], current_pos[1]-1)
82+
83+
else:
84+
trace_stack.append(D + ' ' + str2[current_pos[0]])
85+
current_pos = (current_pos[0]-1, current_pos[1])
86+
87+
return trace_stack[::-1]
88+

tests/test_edit_distance.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import unittest
2+
from algorithms.string.edit_distance import calculate_edit_distance
3+
4+
class TestEditDistance(unittest.TestCase):
5+
6+
def test_edit_distance(self):
7+
8+
self.assertEqual(['Substitute h', 'Insert l', 'Substitute l'], calculate_edit_distance('hello','teio'))
9+

0 commit comments

Comments
 (0)