Skip to content

Commit 6f61931

Browse files
[FIX] improve doc-strings
1 parent bcea3b6 commit 6f61931

File tree

4 files changed

+31
-2
lines changed

4 files changed

+31
-2
lines changed

autoPyTorch/utils/parallel.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,21 @@
33

44

55
def preload_modules(context: multiprocessing.context.BaseContext) -> None:
6+
"""
7+
This function is meant to be used with the forkserver multiprocessing context.
8+
More details about it can be found here:
9+
https://docs.python.org/3/library/multiprocessing.html
10+
11+
Forkserver is known to be slower than other contexts. We use it, because it helps
12+
reduce the probability of a deadlock. To make it fast, we pre-load modules so that
13+
forked children have the desired modules available.
14+
15+
We do not inherit dead-lock problematic modules like logging.
16+
17+
Arguments:
18+
context (multiprocessing.context.BaseContext): One of the three supported multiprocessing
19+
contexts being fork, forkserver or spawn.
20+
"""
621
all_loaded_modules = sys.modules.keys()
722
preload = [
823
loaded_module for loaded_module in all_loaded_modules

autoPyTorch/utils/single_thread_client.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,15 @@ def __del__(self) -> None:
3030

3131
class SingleThreadedClient(dask.distributed.Client):
3232
"""
33-
A class to Mock the Distributed Client class, in case
34-
Auto-Sklearn is meant to run in the current Thread.
33+
A class to Mock the Distributed Client class.
34+
35+
Using dask requires a scheduler which submits jobs on a different process. Also,
36+
pynisher submits jobs in a further additional process.
37+
38+
When using a single core, we would prefer using the same main process without any
39+
multiprocessing overhead (that is, without the need of a LocalCluster in
40+
dask.distributed.Client). In other words, this class enriches the Client() class
41+
with the capability to run a future in the same thread (without any deadlock).
3542
"""
3643
def __init__(self) -> None:
3744

test/test_pipeline/components/training/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def prepare_trainer(self,
2323
trainer: BaseTrainerComponent,
2424
task_type: int,
2525
epochs=50):
26+
# make this test reproducible
27+
torch.manual_seed(42)
2628
if task_type in CLASSIFICATION_TASKS:
2729
X, y = make_classification(
2830
n_samples=n_samples,

test/test_utils/test_single_thread_client.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,10 @@ def test_single_thread_client_like_dask_client():
2323
assert str(future) != ""
2424
assert str(single_thread_client) != ""
2525

26+
# Single thread client is an inherited version of dask client
27+
# so that futures run in the same thread as the main job.
28+
# We carefully selected what methods are inherited, and any other
29+
# method should raise a not implemented error to be safe of major
30+
# dask client api changes.
2631
with pytest.raises(NotImplementedError):
2732
single_thread_client.get_scheduler_logs()

0 commit comments

Comments
 (0)