Merge pull request #25 from caffeine-addictt/dev

caffeine-addictt · web-flow · commit 62395cd04fd1 · 2023-12-12T13:50:47.000+08:00
Removing numpy
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -25,8 +25,6 @@ thread = "thread.__main__:app"
 
 [tool.poetry.dependencies]
 python = "^3.9"
-numpy = "^1.26.2"
-typing-extensions = "^4.8.0"
 typer = {extras = ["all"], version = "^0.9.0"}
 
 
diff --git a/src/thread/thread.py b/src/thread/thread.py
@@ -3,9 +3,9 @@
 import signal
 import threading
 
-import numpy
 from . import exceptions
 from .utils.config import Settings
+from .utils.algorithm import chunk_split
 
 from functools import wraps
 from typing import (
@@ -509,10 +509,10 @@ def start(self) -> None:
     name_format = self.overflow_kwargs.get('name') and self.overflow_kwargs['name'] + '%s'
     self.overflow_kwargs = { i: v for i,v in self.overflow_kwargs.items() if i != 'name' and i != 'args' }
 
-    for i, data_chunk in enumerate(numpy.array_split(self.dataset, max_threads)):
+    for i, data_chunk in enumerate(chunk_split(self.dataset, max_threads)):
       chunk_thread = Thread(
         target = self.function,
-        args = [i, data_chunk.tolist(), *parsed_args, *self.overflow_args],
+        args = [i, data_chunk, *parsed_args, *self.overflow_args],
         name = name_format and name_format % i or None,
         **self.overflow_kwargs
       )
diff --git a/src/thread/utils/__init__.py b/src/thread/utils/__init__.py
@@ -4,3 +4,7 @@
 
 from .logging_config import ColorLogger
 from .config import Settings
+
+from . import (
+  algorithm,
+)
diff --git a/src/thread/utils/algorithm.py b/src/thread/utils/algorithm.py
@@ -0,0 +1,53 @@
+"""
+This file shall host the shared algorithms
+
+If it gets too dense, we could consider splitting it into a library import
+|_ algorithm/
+  |_ __init__.py
+  |_ a.py
+  |_ b.py
+"""
+
+from typing import List, Sequence, Any
+
+
+def chunk_split(dataset: Sequence[Any], number_of_chunks: int) -> List[List[Any]]:
+  """
+  Splits a dataset into balanced chunks
+  
+  If the size of the dataset is not fully divisible by the number of chunks, it is split like this
+    > `[ [n+1], [n+1], [n+1], [n], [n], [n] ]`
+
+
+  Parameters
+  ----------
+  :param dataset: This should be the dataset you want to split into chunks
+  :param number_of_chunks: The should be the number of chunks it will attempt to split into
+
+
+  Returns
+  -------
+  :returns list[list[Any]]: The split dataset
+
+  Raises
+  ------
+  AssertionError: The number of chunks specified is larger than the dataset size
+  """
+  length = len(dataset)
+  assert length >= number_of_chunks, 'The number of chunks specified is larger than the dataset size'
+
+  chunk_count = length // number_of_chunks
+  overflow = length % number_of_chunks
+
+  i = 0
+  split = []
+  while i < length:
+    chunk_length = chunk_count + int(overflow > 0)
+    b = i + chunk_length
+
+    split.append(dataset[i:b])
+    overflow -= 1
+    i = b
+
+  return split
+
diff --git a/tests/test_parallelprocessing.py b/tests/test_parallelprocessing.py
@@ -1,5 +1,4 @@
 import time
-import numpy
 import pytest
 from src.thread import ParallelProcessing, exceptions
 
@@ -19,7 +18,7 @@ def _dummy_raiseException(x: Exception, delay: float = 0):
 # >>>>>>>>>> General Use <<<<<<<<<< #
 def test_threadsScaleDown():
   """This test is for testing if threads scale down `max_threads` when the dataset is lesser than the thread count"""
-  dataset = numpy.arange(0, 2).tolist()
+  dataset = list(range(0, 2))
   new = ParallelProcessing(
     function = _dummy_dataProcessor,
     dataset = dataset,
@@ -32,7 +31,7 @@ def test_threadsScaleDown():
 
 def test_threadsProcessing():
   """This test is for testing if threads correctly order data in the `dataset` arrangement"""
-  dataset = numpy.arange(0, 500).tolist()
+  dataset = list(range(0, 500))
   new = ParallelProcessing(
     function = _dummy_dataProcessor,
     dataset = dataset,
@@ -48,7 +47,7 @@ def test_threadsProcessing():
 # >>>>>>>>>> Raising Exceptions <<<<<<<<<< #
 def test_raises_StillRunningError():
   """This test should raise ThreadStillRunningError"""
-  dataset = numpy.arange(0, 8).tolist()
+  dataset = list(range(0, 8))
   new = ParallelProcessing(
     function = _dummy_dataProcessor,
     dataset = dataset,