Skip to content

Commit a984e16

Browse files
committed
[Feature] Added a more textbook Rabin Karp with prime limitations.
1 parent be36a48 commit a984e16

File tree

2 files changed

+23
-16
lines changed

2 files changed

+23
-16
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,5 @@ Different algorithmic programs. Grouped by general topic.
7878
* [Longest Common Prefix](algorithms/string/longest_common_prefix.py)
7979
* [Trie](algorithms/string/longest_common_prefix.py)
8080
* [Reduce](algorithms/string/longest_common_prefix.py)
81-
* [Finding Substring](algorithms/string/rabin_karp_substring_search.py)
81+
* [Finding Substring](algorithms/string/rabin_karp_substring_search.py)
82+
* [Rabin-Karp](algorithms/string/rabin_karp_substring_search.py)
Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
1-
def rabin_karp_find_substring(string, substring, base=256):
1+
def rabin_karp_find_substring(string, substring, base=256, prime_modulus=487):
22
"""
33
Finds occurances of a substring in a string.
44
5+
This uses the Rabin-Karp rolling hash to calculate a rolling hash
6+
value for windows of letters in the string. Since this is a rolling
7+
hash when going to a new number we can drop the number that will not
8+
be in the next window and add the new one to the hash. Once the
9+
hashes are the same there is a candidate match and the strings must be
10+
examined letter by letter in case of hash collision.
11+
512
Args:
613
string: the string that is being looked in
714
substring: the string to search for
15+
base: the base used to calculate hashes
16+
prime_modulus: positive prime number used to bound the hash results
817
918
Returns:
1019
Index of the beginning of the first occurance
@@ -14,12 +23,13 @@ def rabin_karp_find_substring(string, substring, base=256):
1423
# substring hash
1524
substring_hash = 0
1625
rolling_hash = 0
26+
base_n = pow(base,len(substring)-1)%prime_modulus
1727

1828
# get the initial hashes
1929
for i in range(len(substring)):
20-
rolling_hash = rolling_hash_ord(rolling_hash, base, len(substring) - i - 1, additional_element=string[i])
21-
substring_hash = rolling_hash_ord(substring_hash, base, len(substring) - i - 1, additional_element=substring[i])
22-
30+
rolling_hash = (base * rolling_hash + ord(string[i]))%prime_modulus
31+
substring_hash = (base * substring_hash + ord(substring[i]))%prime_modulus
32+
2333
for i in range(len(string) - len(substring)+1):
2434
# check if hash matches hash of substring
2535
if rolling_hash == substring_hash:
@@ -30,15 +40,11 @@ def rabin_karp_find_substring(string, substring, base=256):
3040
else:
3141
return i
3242
# recalulate hash
33-
if i+len(substring) <= len(string) - 1:
34-
rolling_hash = rolling_hash_ord(rolling_hash, base, len(substring)-1, removal_element=string[i], additional_element=string[i+len(substring)])
35-
return -1
43+
if i < len(string) - len(substring):
44+
# remove the ith number and add the i+len(substring)th number
45+
rolling_hash = ((rolling_hash - (base_n * ord(string[i]))) * base) + ord(string[i + len(substring)])%prime_modulus
46+
47+
# make sure t >= 0
48+
rolling_hash = (rolling_hash + prime_modulus) % prime_modulus
3649

37-
def rolling_hash_ord(previous_hash, base, length, removal_element=None, additional_element=None):
38-
if removal_element and additional_element:
39-
previous_hash -= ord(removal_element) * (base ** length)
40-
previous_hash *= base
41-
previous_hash += ord(additional_element)
42-
elif additional_element:
43-
previous_hash += ord(additional_element) * (base ** length)
44-
return previous_hash
50+
return -1

0 commit comments

Comments
 (0)