Skip to content

Commit d9a69f4

Browse files
committed
[Feature] Added finding duplicates with multiple strategies with O(n) and O(log(n)) runtimes.
1 parent db80c8c commit d9a69f4

File tree

3 files changed

+68
-0
lines changed

3 files changed

+68
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ $ coverage report -m
3535
* [Breadth First Search](searches/breadth_first_search.py)
3636
* [2D Grid BFS](searches/breadth_first_search.py)
3737
* [Graph BFS](searches/breadth_first_search.py)
38+
* [Finding Duplicates](searches/find_duplicates.py)
3839

3940
### Data Structures
4041
* [LRU Cache](structures/lru_cache.py)

searches/find_duplicates.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from searches.binary_search import binary_search
2+
3+
def duplicates_linear(arr1, arr2):
4+
arr1_seen = set(arr1)
5+
output = []
6+
7+
for element in arr2:
8+
if element in arr1_seen:
9+
output.append(element)
10+
11+
return output
12+
13+
def duplicates_pre_sorted(arr1, arr2):
14+
output = []
15+
16+
ptr_1 = 0
17+
ptr_2 = 0
18+
19+
while ptr_1 < len(arr1) and ptr_2 < len(arr2):
20+
21+
if arr2[ptr_2] == arr1[ptr_1]:
22+
output.append(arr1[ptr_1])
23+
ptr_1 += 1
24+
ptr_2 += 1
25+
elif arr2[ptr_2] > arr1[ptr_1]:
26+
ptr_1 += 1
27+
else:
28+
ptr_2 += 1
29+
30+
return output
31+
32+
def duplicates_bin_search(arr1, arr2):
33+
"""
34+
Find duplicates in 2 sets, where one is much larger than the other.
35+
36+
"""
37+
38+
# if arr1 is greater, swap them
39+
if len(arr2) < len(arr1):
40+
arr1, arr2 = arr2, arr1
41+
42+
output = []
43+
44+
for element in arr1:
45+
if binary_search(arr2, element) > 0:
46+
output.append(element)
47+
48+
return output

tests/test_find_duplicates.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import unittest
2+
from searches.find_duplicates import duplicates_linear, duplicates_pre_sorted, duplicates_bin_search
3+
4+
class TestFindDuplicates(unittest.TestCase):
5+
6+
def test_linear(self):
7+
self.assertEqual([1,2,3], duplicates_linear([1,2,3,4,5,6],[1,2,3,7,8,9]))
8+
self.assertEqual([4,1,3,2], duplicates_linear([4,5,7,1,9,2,10,3],[4,90,23,1,53,3,2,22]))
9+
10+
def test_pre_sorted(self):
11+
self.assertEqual([1,2,3], duplicates_pre_sorted([1,2,3,4,5,6],[1,2,3,7,8,9]))
12+
self.assertNotEqual([1,2,3], duplicates_pre_sorted([5,7,1,9,2,10,3],[4,90,23,1,53,3,2,22]))
13+
14+
def test_bin_search(self):
15+
self.assertEqual([1,2,3], duplicates_linear([1,2,3,4,5,6],[1,2,3,7,8,9]))
16+
self.assertEqual([1,2,3], duplicates_linear([1,2,3,4,5,6],[1,2,3,7,8,9,10,11,12,13,14,15]))
17+
18+
def test_bin_search_second_shorter(self):
19+
self.assertEqual([1,2,3], duplicates_linear([1,2,3,7,8,9,10,11,12,13,14,15],[1,2,3,4,5,6]))

0 commit comments

Comments
 (0)