1
+ def calculate_edit_distance (str1 , str2 ):
2
+ '''
3
+ Calculate the edit distance between two strings.
4
+
5
+ An edit is defined as one of three actions, a deletion,
6
+ a replacement, or an addition.
7
+
8
+ '''
9
+
10
+
11
+ # operation enums
12
+ MATCH , INSERT , DELETE = 0 , 1 , 2
13
+
14
+ # three possible operations @ each point
15
+ opt = [0 ,0 ,0 ]
16
+
17
+ # 2D array to hold all edit distance data
18
+ distance = [[0 ] * (len (str1 )+ 1 ) for _ in range (len (str2 )+ 1 )]
19
+ # 2D array to hold parent least cost relationships
20
+ parent = [[0 ] * (len (str1 )+ 1 ) for _ in range (len (str2 )+ 1 )]
21
+
22
+ str1 = " " + str1
23
+ str2 = " " + str2
24
+
25
+ # initial values
26
+ for i in range (len (str2 )):
27
+ distance [i ][0 ] = i
28
+ parent [i ][0 ] = DELETE
29
+
30
+
31
+ for j in range (len (str1 )):
32
+ distance [0 ][j ] = j
33
+ parent [0 ][j ] = INSERT
34
+
35
+ distance [0 ][0 ] = 0
36
+ parent [0 ][0 ] = - 1
37
+
38
+
39
+ # go through every letter combination
40
+ for i in range (1 , len (str2 )):
41
+ for j in range (1 , len (str1 )):
42
+ opt = [0 ,0 ,0 ]
43
+
44
+ # populate with edit data
45
+ if j > 0 :
46
+ opt [INSERT ] = distance [i ][j - 1 ] + 1 # indel
47
+ if i > 0 :
48
+ opt [DELETE ] = distance [i - 1 ][j ] + 1 # indel
49
+ if j > 0 and i > 0 :
50
+ opt [MATCH ] = distance [i - 1 ][j - 1 ] + (0 if str1 [j ] == str2 [i ] else 1 ) # match or substitution
51
+
52
+ # find min cost operation
53
+ lowest_cost = min (opt )
54
+ parent_opt = opt .index (lowest_cost )
55
+ # print(opt, lowest_cost, parent_opt)
56
+ distance [i ][j ] = lowest_cost
57
+ parent [i ][j ] = parent_opt
58
+
59
+ # for i in range(len(distance)):
60
+ # print(distance[i])
61
+ # print('-----')
62
+ # for i in range(len(parent)):
63
+ # print(parent[i])
64
+
65
+ # traceback
66
+ current_pos = (len (str2 )- 1 , len (str1 )- 1 )
67
+ D ,I ,M ,S = 'Delete' ,'Insert' ,'M' ,'Substitute'
68
+ trace_stack = []
69
+ while parent [current_pos [0 ]][current_pos [1 ]] != - 1 :
70
+ parent_val = parent [current_pos [0 ]][current_pos [1 ]]
71
+ if parent_val == 0 :
72
+ if str2 [current_pos [0 ]] == str1 [current_pos [1 ]]:
73
+ # trace_stack.append(M)
74
+ pass
75
+ else :
76
+ trace_stack .append (S + ' ' + str1 [current_pos [1 ]])
77
+
78
+ current_pos = (current_pos [0 ]- 1 , current_pos [1 ]- 1 )
79
+ elif parent_val == 1 :
80
+ trace_stack .append (I + ' ' + str1 [current_pos [1 ]])
81
+ current_pos = (current_pos [0 ], current_pos [1 ]- 1 )
82
+
83
+ else :
84
+ trace_stack .append (D + ' ' + str2 [current_pos [0 ]])
85
+ current_pos = (current_pos [0 ]- 1 , current_pos [1 ])
86
+
87
+ return trace_stack [::- 1 ]
88
+
0 commit comments