1
- def rabin_karp_find_substring (string , substring , base = 256 ):
1
+ def rabin_karp_find_substring (string , substring , base = 256 , prime_modulus = 487 ):
2
2
"""
3
3
Finds occurances of a substring in a string.
4
4
5
+ This uses the Rabin-Karp rolling hash to calculate a rolling hash
6
+ value for windows of letters in the string. Since this is a rolling
7
+ hash when going to a new number we can drop the number that will not
8
+ be in the next window and add the new one to the hash. Once the
9
+ hashes are the same there is a candidate match and the strings must be
10
+ examined letter by letter in case of hash collision.
11
+
5
12
Args:
6
13
string: the string that is being looked in
7
14
substring: the string to search for
15
+ base: the base used to calculate hashes
16
+ prime_modulus: positive prime number used to bound the hash results
8
17
9
18
Returns:
10
19
Index of the beginning of the first occurance
@@ -14,12 +23,13 @@ def rabin_karp_find_substring(string, substring, base=256):
14
23
# substring hash
15
24
substring_hash = 0
16
25
rolling_hash = 0
26
+ base_n = pow (base ,len (substring )- 1 )% prime_modulus
17
27
18
28
# get the initial hashes
19
29
for i in range (len (substring )):
20
- rolling_hash = rolling_hash_ord ( rolling_hash , base , len ( substring ) - i - 1 , additional_element = string [i ])
21
- substring_hash = rolling_hash_ord ( substring_hash , base , len ( substring ) - i - 1 , additional_element = substring [i ])
22
-
30
+ rolling_hash = ( base * rolling_hash + ord ( string [i ])) % prime_modulus
31
+ substring_hash = ( base * substring_hash + ord ( substring [i ])) % prime_modulus
32
+
23
33
for i in range (len (string ) - len (substring )+ 1 ):
24
34
# check if hash matches hash of substring
25
35
if rolling_hash == substring_hash :
@@ -30,15 +40,11 @@ def rabin_karp_find_substring(string, substring, base=256):
30
40
else :
31
41
return i
32
42
# recalulate hash
33
- if i + len (substring ) <= len (string ) - 1 :
34
- rolling_hash = rolling_hash_ord (rolling_hash , base , len (substring )- 1 , removal_element = string [i ], additional_element = string [i + len (substring )])
35
- return - 1
43
+ if i < len (string ) - len (substring ):
44
+ # remove the ith number and add the i+len(substring)th number
45
+ rolling_hash = ((rolling_hash - (base_n * ord (string [i ]))) * base ) + ord (string [i + len (substring )])% prime_modulus
46
+
47
+ # make sure t >= 0
48
+ rolling_hash = (rolling_hash + prime_modulus ) % prime_modulus
36
49
37
- def rolling_hash_ord (previous_hash , base , length , removal_element = None , additional_element = None ):
38
- if removal_element and additional_element :
39
- previous_hash -= ord (removal_element ) * (base ** length )
40
- previous_hash *= base
41
- previous_hash += ord (additional_element )
42
- elif additional_element :
43
- previous_hash += ord (additional_element ) * (base ** length )
44
- return previous_hash
50
+ return - 1
0 commit comments