codezonediitj · czgdp1807 · Oct 12, 2021 · Oct 12, 2021 · Oct 12, 2021 · Oct 12, 2021
diff --git a/pydatastructs/strings/algorithms.py b/pydatastructs/strings/algorithms.py
@@ -5,6 +5,8 @@
     'find'
 ]
 
+PRIME_NUMBER, MOD = 257, 1000000007
+
 def find(text, query, algorithm):
     """
     Finds occurrence of a query string within the text string.
@@ -22,6 +24,7 @@ def find(text, query, algorithm):
         Currently the following algorithms are
         supported,
         'kmp' -> Knuth-Morris-Pratt as given in [1].
+        'rabin_karp' -> Rabin–Karp algorithm as given in [2].
 
     Returns
     =======
@@ -52,6 +55,7 @@ def find(text, query, algorithm):
     ==========
 
     .. [1] https://en.wikipedia.org/wiki/Knuth–Morris–Pratt_algorithm
+    .. [2] https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
     """
     import pydatastructs.strings.algorithms as algorithms
     func = "_" + algorithm
@@ -64,6 +68,8 @@ def find(text, query, algorithm):
 
 
 def _knuth_morris_pratt(text, query):
+    if len(text) == 0 or len(query) == 0:
+        return DynamicOneDimensionalArray(int, 0)
     kmp_table = _build_kmp_table(query)
     return _do_match(text, query, kmp_table)
 
@@ -107,3 +113,40 @@ def _do_match(string, query, kmp_table):
                 k = k + 1
 
     return positions
+
+def _p_pow(length, p=PRIME_NUMBER, m=MOD):
+    p_pow = OneDimensionalArray(int, length)
+    p_pow[0] = 1
+    for i in range(1, length):
+        p_pow[i] = (p_pow[i-1] * p) % m
+    return p_pow
+
+def _hash_str(string, p=PRIME_NUMBER, m=MOD):
+    hash_value = 0
+    p_pow = _p_pow(len(string), p, m)
+    for i in range(len(string)):
+        hash_value = (hash_value + ord(string[i]) * p_pow[i]) % m
+    return hash_value
+
+def _rabin_karp(text, query):
+    t = len(text)
+    q = len(query)
+    positions = DynamicOneDimensionalArray(int, 0)
+    if q == 0 or t == 0:
+        return positions
+
+    query_hash = _hash_str(query)
+    text_hash = OneDimensionalArray(int, t + 1)
+    text_hash.fill(0)
+    p_pow = _p_pow(t)
+
+    for i in range(t):
+        text_hash[i+1] = (text_hash[i] + ord(text[i]) * p_pow[i]) % MOD
+    for i in range(t - q + 1):
+        curr_hash = (text_hash[i + q] + MOD - text_hash[i]) % MOD
+        if curr_hash == (query_hash * p_pow[i]) % MOD:
+            positions.append(i)
+
+    return positions
+
+
diff --git a/pydatastructs/strings/tests/test_algorithms.py b/pydatastructs/strings/tests/test_algorithms.py
@@ -5,6 +5,8 @@
 def test_kmp():
     _test_common_string_matching('kmp')
 
+def test_rka():
+    _test_common_string_matching('rabin_karp')
 
 def _test_common_string_matching(algorithm):
     true_text_pattern_dictionary = {
@@ -26,7 +28,9 @@ def _test_common_string_matching(algorithm):
         "Knuth-Morris-Pratt": "-Pratt-",
         "abcabcabcabdabcabdabcabca": "qwertyuiopzxcvbnm",
         "aefcdfaecdaefaefcdaefeaefcdcdeae": "cdaefaefe",
-        "fullstringmatch": "fullstrinmatch"
+        "fullstringmatch": "fullstrinmatch",
+        "abc": "",
+        "": "abc"
     }
 
     for test_case_key in false_text_pattern_dictionary:
@@ -52,13 +56,13 @@ def gen_random_string(length):
             if rand_str != query:
                 freq += 1
                 text += query + rand_str + query
-        positions = find(text, query, algorithm="kmp")
+        positions = find(text, query, algorithm)
         assert positions._num == num_times * 2
         for i in range(positions._last_pos_filled):
             p = positions[i]
             assert text[p:p + len(query)] == query
 
         text = gen_random_string(len(query))
         if text != query:
-            positions = find(text, query, algorithm="kmp")
+            positions = find(text, query, algorithm)
             assert positions.size == 0