Skip to content

Commit 62395cd

Browse files
Merge pull request #25 from caffeine-addictt/dev
Removing numpy
2 parents c0e5610 + 8fd7c07 commit 62395cd

File tree

6 files changed

+64
-55
lines changed

6 files changed

+64
-55
lines changed

poetry.lock

Lines changed: 1 addition & 46 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ thread = "thread.__main__:app"
2525

2626
[tool.poetry.dependencies]
2727
python = "^3.9"
28-
numpy = "^1.26.2"
29-
typing-extensions = "^4.8.0"
3028
typer = {extras = ["all"], version = "^0.9.0"}
3129

3230

src/thread/thread.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import signal
44
import threading
55

6-
import numpy
76
from . import exceptions
87
from .utils.config import Settings
8+
from .utils.algorithm import chunk_split
99

1010
from functools import wraps
1111
from typing import (
@@ -509,10 +509,10 @@ def start(self) -> None:
509509
name_format = self.overflow_kwargs.get('name') and self.overflow_kwargs['name'] + '%s'
510510
self.overflow_kwargs = { i: v for i,v in self.overflow_kwargs.items() if i != 'name' and i != 'args' }
511511

512-
for i, data_chunk in enumerate(numpy.array_split(self.dataset, max_threads)):
512+
for i, data_chunk in enumerate(chunk_split(self.dataset, max_threads)):
513513
chunk_thread = Thread(
514514
target = self.function,
515-
args = [i, data_chunk.tolist(), *parsed_args, *self.overflow_args],
515+
args = [i, data_chunk, *parsed_args, *self.overflow_args],
516516
name = name_format and name_format % i or None,
517517
**self.overflow_kwargs
518518
)

src/thread/utils/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,7 @@
44

55
from .logging_config import ColorLogger
66
from .config import Settings
7+
8+
from . import (
9+
algorithm,
10+
)

src/thread/utils/algorithm.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
This file shall host the shared algorithms
3+
4+
If it gets too dense, we could consider splitting it into a library import
5+
|_ algorithm/
6+
|_ __init__.py
7+
|_ a.py
8+
|_ b.py
9+
"""
10+
11+
from typing import List, Sequence, Any
12+
13+
14+
def chunk_split(dataset: Sequence[Any], number_of_chunks: int) -> List[List[Any]]:
15+
"""
16+
Splits a dataset into balanced chunks
17+
18+
If the size of the dataset is not fully divisible by the number of chunks, it is split like this
19+
> `[ [n+1], [n+1], [n+1], [n], [n], [n] ]`
20+
21+
22+
Parameters
23+
----------
24+
:param dataset: This should be the dataset you want to split into chunks
25+
:param number_of_chunks: The should be the number of chunks it will attempt to split into
26+
27+
28+
Returns
29+
-------
30+
:returns list[list[Any]]: The split dataset
31+
32+
Raises
33+
------
34+
AssertionError: The number of chunks specified is larger than the dataset size
35+
"""
36+
length = len(dataset)
37+
assert length >= number_of_chunks, 'The number of chunks specified is larger than the dataset size'
38+
39+
chunk_count = length // number_of_chunks
40+
overflow = length % number_of_chunks
41+
42+
i = 0
43+
split = []
44+
while i < length:
45+
chunk_length = chunk_count + int(overflow > 0)
46+
b = i + chunk_length
47+
48+
split.append(dataset[i:b])
49+
overflow -= 1
50+
i = b
51+
52+
return split
53+

tests/test_parallelprocessing.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import time
2-
import numpy
32
import pytest
43
from src.thread import ParallelProcessing, exceptions
54

@@ -19,7 +18,7 @@ def _dummy_raiseException(x: Exception, delay: float = 0):
1918
# >>>>>>>>>> General Use <<<<<<<<<< #
2019
def test_threadsScaleDown():
2120
"""This test is for testing if threads scale down `max_threads` when the dataset is lesser than the thread count"""
22-
dataset = numpy.arange(0, 2).tolist()
21+
dataset = list(range(0, 2))
2322
new = ParallelProcessing(
2423
function = _dummy_dataProcessor,
2524
dataset = dataset,
@@ -32,7 +31,7 @@ def test_threadsScaleDown():
3231

3332
def test_threadsProcessing():
3433
"""This test is for testing if threads correctly order data in the `dataset` arrangement"""
35-
dataset = numpy.arange(0, 500).tolist()
34+
dataset = list(range(0, 500))
3635
new = ParallelProcessing(
3736
function = _dummy_dataProcessor,
3837
dataset = dataset,
@@ -48,7 +47,7 @@ def test_threadsProcessing():
4847
# >>>>>>>>>> Raising Exceptions <<<<<<<<<< #
4948
def test_raises_StillRunningError():
5049
"""This test should raise ThreadStillRunningError"""
51-
dataset = numpy.arange(0, 8).tolist()
50+
dataset = list(range(0, 8))
5251
new = ParallelProcessing(
5352
function = _dummy_dataProcessor,
5453
dataset = dataset,

0 commit comments

Comments
 (0)