Skip to content

Commit a20e4e5

Browse files
authored
Added parallel merge sort (#177)
1 parent 2ef83ae commit a20e4e5

File tree

4 files changed

+139
-7
lines changed

4 files changed

+139
-7
lines changed

pydatastructs/linear_data_structures/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
from . import (
44
arrays,
5-
linked_lists
5+
linked_lists,
6+
algorithms
67
)
78

89
from .arrays import (
@@ -18,3 +19,8 @@
1819
DoublyCircularLinkedList
1920
)
2021
__all__.extend(linked_lists.__all__)
22+
23+
from .algorithms import (
24+
merge_sort_parallel
25+
)
26+
__all__.extend(algorithms.__all__)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from pydatastructs.linear_data_structures.arrays import (
2+
OneDimensionalArray, DynamicArray)
3+
from pydatastructs.utils.misc_util import _check_type
4+
from concurrent.futures import ThreadPoolExecutor
5+
from math import log, floor
6+
7+
__all__ = [
8+
'merge_sort_parallel'
9+
]
10+
11+
def _merge(array, sl, el, sr, er, end):
12+
l, r = [], []
13+
for i in range(sl, el + 1):
14+
if (i <= end and
15+
array[i] is not None):
16+
l.append(array[i])
17+
array[i] = None
18+
for i in range(sr, er + 1):
19+
if (i <= end and
20+
array[i] is not None):
21+
r.append(array[i])
22+
array[i] = None
23+
i, j, k = 0, 0, sl
24+
while i < len(l) and j < len(r):
25+
if l[i] <= r[j]:
26+
array[k] = l[i]
27+
i += 1
28+
else:
29+
array[k] = r[j]
30+
j += 1
31+
k += 1
32+
33+
while i < len(l):
34+
array[k] = l[i]
35+
i += 1
36+
k += 1
37+
38+
while j < len(r):
39+
array[k] = r[j]
40+
j += 1
41+
k += 1
42+
43+
def merge_sort_parallel(array, num_threads, **kwargs):
44+
"""
45+
Implements parallel merge sort.
46+
47+
Parameters
48+
==========
49+
50+
array: Array
51+
The array which is to be sorted.
52+
num_threads: int
53+
The maximum number of threads
54+
to be used for sorting.
55+
start: int
56+
The starting index of the portion
57+
which is to be sorted.
58+
Optional, by default 0
59+
end: int
60+
The ending index of the portion which
61+
is to be sorted.
62+
Optional, by default the index
63+
of the last position filled.
64+
65+
Examples
66+
========
67+
68+
>>> from pydatastructs import OneDimensionalArray, merge_sort_parallel
69+
>>> arr = OneDimensionalArray(int,[3, 2, 1])
70+
>>> merge_sort_parallel(arr, 3)
71+
>>> [arr[0], arr[1], arr[2]]
72+
[1, 2, 3]
73+
74+
References
75+
==========
76+
77+
.. [1] https://en.wikipedia.org/wiki/Merge_sort
78+
"""
79+
start = kwargs.get('start', 0)
80+
end = kwargs.get('end', array._size - 1)
81+
for size in range(floor(log(end - start + 1, 2)) + 1):
82+
pow_2 = 2**size
83+
with ThreadPoolExecutor(max_workers=num_threads) as Executor:
84+
i = start
85+
while i <= end:
86+
Executor.submit(
87+
_merge,
88+
array,
89+
i, i + pow_2 - 1,
90+
i + pow_2, i + 2*pow_2 - 1,
91+
end).result()
92+
i = i + 2*pow_2
93+
94+
if _check_type(array, DynamicArray):
95+
array._modify(force=True)

pydatastructs/linear_data_structures/arrays.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,12 @@ def __new__(cls, dtype=NoneType, *args, **kwargs):
209209
obj._last_pos_filled = obj._num - 1
210210
return obj
211211

212-
def _modify(self):
212+
def _modify(self, force=False):
213213
"""
214214
Contracts the array if Num(T)/Size(T) falls
215215
below load factor.
216216
"""
217-
if self._num/self._size < self._load_factor:
217+
if (self._num/self._size < self._load_factor) or force:
218218
arr_new = OneDimensionalArray(self._dtype, 2*self._num + 1)
219219
j = 0
220220
for i in range(self._last_pos_filled + 1):
@@ -231,14 +231,12 @@ def append(self, el):
231231
for i in range(self._last_pos_filled + 1):
232232
arr_new[i] = self[i]
233233
arr_new[self._last_pos_filled + 1] = el
234-
self._last_pos_filled += 1
235234
self._size = arr_new._size
236-
self._num += 1
237235
self._data = arr_new._data
238236
else:
239237
self[self._last_pos_filled + 1] = el
240-
self._last_pos_filled += 1
241-
self._num += 1
238+
self._last_pos_filled += 1
239+
self._num += 1
242240
self._modify()
243241

244242
def delete(self, idx):
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from pydatastructs import (
2+
merge_sort_parallel, DynamicOneDimensionalArray,
3+
OneDimensionalArray)
4+
import random
5+
6+
def test_merge_sort_parallel():
7+
8+
random.seed(1000)
9+
10+
n = random.randint(10, 20)
11+
arr = DynamicOneDimensionalArray(int, 0)
12+
for _ in range(n):
13+
arr.append(random.randint(1, 1000))
14+
for _ in range(n//3):
15+
arr.delete(random.randint(0, n//2))
16+
expected_arr = [686, 779, 102, 134, 362,
17+
448, 480, 548, 228, 688,
18+
247, 373, 696, None, None,
19+
None, None, None, None,
20+
None, None, None, None,
21+
None, None, None, None]
22+
merge_sort_parallel(arr, 5, start=2, end=10)
23+
assert arr._data == expected_arr
24+
25+
n = random.randint(10, 20)
26+
arr = OneDimensionalArray(int, n)
27+
for i in range(n):
28+
arr[i] = random.randint(1, 1000)
29+
expected_arr = [42, 695, 147, 500, 768,
30+
998, 473, 732, 728, 426,
31+
709, 910]
32+
merge_sort_parallel(arr, 5, start=2, end=5)
33+
assert arr._data == expected_arr

0 commit comments

Comments
 (0)