Skip to content

Commit 1a6a8b8

Browse files
Merge pull request #77 from python-thread/perf/return-generator
Make chunking more memory efficient
2 parents 9217bbc + 214a6c5 commit 1a6a8b8

File tree

2 files changed

+19
-16
lines changed

2 files changed

+19
-16
lines changed

src/thread/utils/algorithm.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
|_ b.py
99
"""
1010

11-
from typing import List, Tuple
11+
from typing import Tuple, Generator
1212

1313

14-
def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, int]]:
14+
def chunk_split(
15+
dataset_length: int, number_of_chunks: int
16+
) -> Generator[Tuple[int, int], None, None]:
1517
"""
1618
Splits a dataset into balanced chunks
1719
@@ -27,7 +29,7 @@ def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, i
2729
2830
Returns
2931
-------
30-
:returns list[tuple[int, int]]: The chunked dataset slices
32+
:returns Generator[tuple[int, int], None, None]: The chunked dataset slices
3133
3234
Raises
3335
------
@@ -41,13 +43,10 @@ def chunk_split(dataset_length: int, number_of_chunks: int) -> List[Tuple[int, i
4143
overflow = dataset_length % number_of_chunks
4244

4345
i = 0
44-
split = []
4546
while i < dataset_length:
4647
chunk_length = chunk_count + int(overflow > 0)
4748
b = i + chunk_length
4849

49-
split.append((i, b))
50+
yield (i, b)
5051
overflow -= 1
5152
i = b
52-
53-
return split

tests/test_algorithm.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
import random
2+
from typing import Generator
3+
24
from src.thread.utils import algorithm
35

46

7+
def test_type():
8+
assert isinstance(algorithm.chunk_split(5, 1), Generator)
9+
10+
511
def test_chunking_1():
6-
assert algorithm.chunk_split(5, 1) == [(0, 5)]
12+
assert list(algorithm.chunk_split(5, 1)) == [(0, 5)]
713

814

915
def test_chunking_2():
10-
assert algorithm.chunk_split(5, 2) == [(0, 3), (3, 5)]
16+
assert list(algorithm.chunk_split(5, 2)) == [(0, 3), (3, 5)]
1117

1218

1319
def test_chunking_3():
14-
assert algorithm.chunk_split(100, 8) == [
20+
assert list(algorithm.chunk_split(100, 8)) == [
1521
(0, 13),
1622
(13, 26),
1723
(26, 39),
@@ -31,15 +37,13 @@ def test_chunking_dynamic():
3137
expected_chunk_high = dataset_length % thread_count
3238

3339
i = 0
34-
heap = []
40+
gen = algorithm.chunk_split(dataset_length, thread_count)
3541
while i < dataset_length:
3642
chunk_length = expected_chunk_length_low + int(expected_chunk_high > 0)
3743
b = i + chunk_length
3844

39-
heap.append((i, b))
45+
assert (
46+
next(gen) == (i, b)
47+
), f'\nIndex: {i}\nLength: {dataset_length}\nThreads: {thread_count}\nExpected: {(i, b)}\nActual: {next(gen)}'
4048
expected_chunk_high -= 1
4149
i = b
42-
43-
assert (
44-
algorithm.chunk_split(dataset_length, thread_count) == heap
45-
), f'\nLength: {dataset_length}\nThreads: {thread_count}\nExpected: {heap}\nActual: {algorithm.chunk_split(dataset_length, thread_count)}'

0 commit comments

Comments
 (0)