Skip to content

Commit be36a48

Browse files
committed
[Feature] Added basic Rabin Karp style substring searching.
1 parent 6b71ab4 commit be36a48

File tree

3 files changed

+63
-1
lines changed

3 files changed

+63
-1
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,4 +77,5 @@ Different algorithmic programs. Grouped by general topic.
7777
* [Letters from Phone](algorithms/string/combinations.py)
7878
* [Longest Common Prefix](algorithms/string/longest_common_prefix.py)
7979
* [Trie](algorithms/string/longest_common_prefix.py)
80-
* [Reduce](algorithms/string/longest_common_prefix.py)
80+
* [Reduce](algorithms/string/longest_common_prefix.py)
81+
* [Finding Substring](algorithms/string/rabin_karp_substring_search.py)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
def rabin_karp_find_substring(string, substring, base=256):
2+
"""
3+
Finds occurances of a substring in a string.
4+
5+
Args:
6+
string: the string that is being looked in
7+
substring: the string to search for
8+
9+
Returns:
10+
Index of the beginning of the first occurance
11+
of a substring that is within the string.
12+
13+
"""
14+
# substring hash
15+
substring_hash = 0
16+
rolling_hash = 0
17+
18+
# get the initial hashes
19+
for i in range(len(substring)):
20+
rolling_hash = rolling_hash_ord(rolling_hash, base, len(substring) - i - 1, additional_element=string[i])
21+
substring_hash = rolling_hash_ord(substring_hash, base, len(substring) - i - 1, additional_element=substring[i])
22+
23+
for i in range(len(string) - len(substring)+1):
24+
# check if hash matches hash of substring
25+
if rolling_hash == substring_hash:
26+
# check if the letters are 1:1
27+
for s_i, letter in enumerate(substring):
28+
if letter != string[i+s_i]:
29+
break
30+
else:
31+
return i
32+
# recalulate hash
33+
if i+len(substring) <= len(string) - 1:
34+
rolling_hash = rolling_hash_ord(rolling_hash, base, len(substring)-1, removal_element=string[i], additional_element=string[i+len(substring)])
35+
return -1
36+
37+
def rolling_hash_ord(previous_hash, base, length, removal_element=None, additional_element=None):
38+
if removal_element and additional_element:
39+
previous_hash -= ord(removal_element) * (base ** length)
40+
previous_hash *= base
41+
previous_hash += ord(additional_element)
42+
elif additional_element:
43+
previous_hash += ord(additional_element) * (base ** length)
44+
return previous_hash

tests/test_rabin_karp.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import unittest
2+
from algorithms.string.rabin_karp_substring_search import rabin_karp_find_substring
3+
4+
class TestRabinKarp(unittest.TestCase):
5+
6+
def test_rabin_karp(self):
7+
self.assertEqual(2, rabin_karp_find_substring('hello','llo'))
8+
self.assertEqual(5, rabin_karp_find_substring('aabbababaa','babaa'))
9+
10+
def test_rabin_karp_miss(self):
11+
self.assertEqual(-1, rabin_karp_find_substring('hello','af'))
12+
13+
def test_find_first_with_mult(self):
14+
self.assertEqual(3, rabin_karp_find_substring('abcabbabcabb', 'abb'))
15+
16+
def test_same_hash_diff_order(self):
17+
self.assertEqual(15, rabin_karp_find_substring('Where are your ears?', 'ear', base=0))

0 commit comments

Comments
 (0)