Skip to content

Add doctests for the boyer_moore_search algorithm. #12769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 2, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 33 additions & 16 deletions strings/boyer_moore_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,42 @@
a shift is proposed that moves the entirety of Pattern past
the point of mismatch in the text.

If there no mismatch then the pattern matches with text block.
If there is no mismatch then the pattern matches with text block.

Time Complexity : O(n/m)
n=length of main string
m=length of pattern string
"""

from __future__ import annotations


class BoyerMooreSearch:
"""
Example usage:

bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
positions = bms.bad_character_heuristic()

where 'positions' contain the locations where the pattern was matched.
"""

def __init__(self, text: str, pattern: str):
self.text, self.pattern = text, pattern
self.textLen, self.patLen = len(text), len(pattern)

def match_in_pattern(self, char: str) -> int:
"""finds the index of char in pattern in reverse order
"""
Finds the index of char in pattern in reverse order.

Parameters :
char (chr): character to be searched

Returns :
i (int): index of char from last in pattern
-1 (int): if char is not found in pattern

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.match_in_pattern("B")
1
"""

for i in range(self.patLen - 1, -1, -1):
Expand All @@ -44,15 +56,19 @@ def match_in_pattern(self, char: str) -> int:

def mismatch_in_text(self, current_pos: int) -> int:
"""
find the index of mis-matched character in text when compared with pattern
from last
Find the index of mis-matched character in text when compared with pattern
from last.

Parameters :
current_pos (int): current index position of text

Returns :
i (int): index of mismatched char from last in text
-1 (int): if there is no mismatch between pattern and text block

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.mismatch_in_text(2)
3
"""

for i in range(self.patLen - 1, -1, -1):
Expand All @@ -61,7 +77,14 @@ def mismatch_in_text(self, current_pos: int) -> int:
return -1

def bad_character_heuristic(self) -> list[int]:
# searches pattern in text and returns index positions
"""
Finds the positions of the pattern location.

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.bad_character_heuristic()
[0, 3]
"""

positions = []
for i in range(self.textLen - self.patLen + 1):
mismatch_index = self.mismatch_in_text(i)
Expand All @@ -75,13 +98,7 @@ def bad_character_heuristic(self) -> list[int]:
return positions


text = "ABAABA"
pattern = "AB"
bms = BoyerMooreSearch(text, pattern)
positions = bms.bad_character_heuristic()
if __name__ == "__main__":
import doctest

if len(positions) == 0:
print("No match found")
else:
print("Pattern found in following positions: ")
print(positions)
doctest.testmod()