File tree 4 files changed +31
-2
lines changed
test_pipeline/components/training 4 files changed +31
-2
lines changed Original file line number Diff line number Diff line change 3
3
4
4
5
5
def preload_modules (context : multiprocessing .context .BaseContext ) -> None :
6
+ """
7
+ This function is meant to be used with the forkserver multiprocessing context.
8
+ More details about it can be found here:
9
+ https://docs.python.org/3/library/multiprocessing.html
10
+
11
+ Forkserver is known to be slower than other contexts. We use it, because it helps
12
+ reduce the probability of a deadlock. To make it fast, we pre-load modules so that
13
+ forked children have the desired modules available.
14
+
15
+ We do not inherit dead-lock problematic modules like logging.
16
+
17
+ Arguments:
18
+ context (multiprocessing.context.BaseContext): One of the three supported multiprocessing
19
+ contexts being fork, forkserver or spawn.
20
+ """
6
21
all_loaded_modules = sys .modules .keys ()
7
22
preload = [
8
23
loaded_module for loaded_module in all_loaded_modules
Original file line number Diff line number Diff line change @@ -30,8 +30,15 @@ def __del__(self) -> None:
30
30
31
31
class SingleThreadedClient (dask .distributed .Client ):
32
32
"""
33
- A class to Mock the Distributed Client class, in case
34
- Auto-Sklearn is meant to run in the current Thread.
33
+ A class to Mock the Distributed Client class.
34
+
35
+ Using dask requires a scheduler which submits jobs on a different process. Also,
36
+ pynisher submits jobs in a further additional process.
37
+
38
+ When using a single core, we would prefer using the same main process without any
39
+ multiprocessing overhead (that is, without the need of a LocalCluster in
40
+ dask.distributed.Client). In other words, this class enriches the Client() class
41
+ with the capability to run a future in the same thread (without any deadlock).
35
42
"""
36
43
def __init__ (self ) -> None :
37
44
Original file line number Diff line number Diff line change @@ -23,6 +23,8 @@ def prepare_trainer(self,
23
23
trainer : BaseTrainerComponent ,
24
24
task_type : int ,
25
25
epochs = 50 ):
26
+ # make this test reproducible
27
+ torch .manual_seed (42 )
26
28
if task_type in CLASSIFICATION_TASKS :
27
29
X , y = make_classification (
28
30
n_samples = n_samples ,
Original file line number Diff line number Diff line change @@ -23,5 +23,10 @@ def test_single_thread_client_like_dask_client():
23
23
assert str (future ) != ""
24
24
assert str (single_thread_client ) != ""
25
25
26
+ # Single thread client is an inherited version of dask client
27
+ # so that futures run in the same thread as the main job.
28
+ # We carefully selected what methods are inherited, and any other
29
+ # method should raise a not implemented error to be safe of major
30
+ # dask client api changes.
26
31
with pytest .raises (NotImplementedError ):
27
32
single_thread_client .get_scheduler_logs ()
You can’t perform that action at this time.
0 commit comments