7
7
8
8
from __future__ import annotations
9
9
10
- from abc import ABC
10
+ from abc import ABC , abstractmethod
11
11
from concurrent .futures import Future
12
12
from importlib import import_module
13
13
from typing import Any , Literal , Optional , TYPE_CHECKING
14
14
15
+ import cloudpickle
15
16
from pyiron_snippets .colors import SeabornColors
16
17
from pyiron_snippets .dotdict import DotDict
17
18
@@ -152,8 +153,8 @@ class Node(
152
153
153
154
This is an abstract class.
154
155
Children *must* define how :attr:`inputs` and :attr:`outputs` are constructed,
155
- what will happen :meth:`on_run `, the :attr:`run_args` that will get passed to
156
- :meth:`on_run `, and how to :meth:`process_run_result` once :meth:`on_run ` finishes.
156
+ what will happen :meth:`_on_run `, the :attr:`run_args` that will get passed to
157
+ :meth:`_on_run `, and how to :meth:`process_run_result` once :meth:`_on_run ` finishes.
157
158
They may optionally add additional signal channels to the signals IO.
158
159
159
160
Attributes:
@@ -192,6 +193,9 @@ class Node(
192
193
autoload (Literal["pickle"] | StorageInterface | None): Whether to check
193
194
for a matching saved node and what storage back end to use to do so (no
194
195
auto-loading if the back end is `None`.)
196
+ _serialize_result (bool): (IN DEVELOPMENT) Cloudpickle the output of running
197
+ the node; this is useful if the run is happening in a parallel process and
198
+ the parent process may be killed before it is finished. (Default is False.)
195
199
signals (pyiron_workflow.io.Signals): A container for input and output
196
200
signals, which are channels for controlling execution flow. By default, has
197
201
a :attr:`signals.inputs.run` channel which has a callback to the
@@ -218,7 +222,7 @@ class Node(
218
222
its internal structure.
219
223
execute: An alias for :meth:`run`, but with flags to run right here, right now,
220
224
and with the input it currently has.
221
- on_run : **Abstract.** Do the thing. What thing must be specified by child
225
+ _on_run : **Abstract.** Do the thing. What thing must be specified by child
222
226
classes.
223
227
pull: An alias for :meth:`run` that runs everything upstream, then runs this
224
228
node (but doesn't fire off the `ran` signal, so nothing happens farther
@@ -227,7 +231,7 @@ class Node(
227
231
object is encountered).
228
232
replace_with: If the node belongs to a parent, attempts to replace itself in
229
233
that parent with a new provided node.
230
- run: Run the node function from :meth:`on_run `. Handles status automatically.
234
+ run: Run the node function from :meth:`_on_run `. Handles status automatically.
231
235
Various execution options are available as boolean flags.
232
236
set_input_values: Allows input channels' values to be updated without any
233
237
running.
@@ -290,6 +294,10 @@ def __init__(
290
294
)
291
295
self .checkpoint = checkpoint
292
296
self .recovery : Literal ["pickle" ] | StorageInterface | None = "pickle"
297
+ self ._serialize_result = False # Advertised, but private to indicate
298
+ # under-development status -- API may change to be more user-friendly
299
+ self ._do_clean : bool = False # Power-user override for cleaning up temporary
300
+ # serialized results and empty directories (or not).
293
301
self ._cached_inputs = None
294
302
self ._user_data = {} # A place for power-users to bypass node-injection
295
303
@@ -373,6 +381,29 @@ def _readiness_error_message(self) -> str:
373
381
f" conform to type hints.\n " + self .readiness_report
374
382
)
375
383
384
+ def on_run (self , * args , ** kwargs ) -> Any :
385
+ save_result : bool = args [0 ]
386
+ args = args [1 :]
387
+ result = self ._on_run (* args , ** kwargs )
388
+ if save_result :
389
+ self ._temporary_result_pickle (result )
390
+ return result
391
+
392
+ @abstractmethod
393
+ def _on_run (self , * args , ** kwargs ) -> Any :
394
+ pass
395
+
396
+ @property
397
+ def run_args (self ) -> tuple [tuple , dict ]:
398
+ args , kwargs = self ._run_args
399
+ args = (self ._serialize_result ,) + args
400
+ return args , kwargs
401
+
402
+ @property
403
+ @abstractmethod
404
+ def _run_args (self , * args , ** kwargs ) -> Any :
405
+ pass
406
+
376
407
def run (
377
408
self ,
378
409
* args ,
@@ -431,6 +462,22 @@ def run(
431
462
Kwargs updating input channel values happens _first_ and will get
432
463
overwritten by any subsequent graph-based data manipulation.
433
464
"""
465
+ if self .running and self ._serialize_result :
466
+ if self ._temporary_result_file .is_file ():
467
+ return self ._finish_run (
468
+ self ._temporary_result_unpickle (),
469
+ raise_run_exceptions = raise_run_exceptions ,
470
+ run_exception_kwargs = {},
471
+ run_finally_kwargs = {
472
+ "emit_ran_signal" : emit_ran_signal ,
473
+ "raise_run_exceptions" : raise_run_exceptions ,
474
+ },
475
+ )
476
+ else :
477
+ raise ValueError (
478
+ f"{ self .full_label } is still waiting for a serialized result"
479
+ )
480
+
434
481
self .set_input_values (* args , ** kwargs )
435
482
436
483
return super ().run (
@@ -520,6 +567,9 @@ def _run_finally(self, /, emit_ran_signal: bool, raise_run_exceptions: bool):
520
567
backend = self .recovery , filename = self .as_path ().joinpath ("recovery" )
521
568
)
522
569
570
+ if self ._do_clean :
571
+ self ._clean_graph_directory ()
572
+
523
573
def run_data_tree (self , run_parent_trees_too = False ) -> None :
524
574
"""
525
575
Use topological analysis to build a tree of all upstream dependencies and run
@@ -628,6 +678,21 @@ def cache_hit(self):
628
678
except :
629
679
return False
630
680
681
+ @property
682
+ def _temporary_result_file (self ):
683
+ return self .as_path ().joinpath ("run_result.tmp" )
684
+
685
+ def _temporary_result_pickle (self , results ):
686
+ self ._temporary_result_file .parent .mkdir (parents = True , exist_ok = True )
687
+ self ._temporary_result_file .touch (exist_ok = False )
688
+ with self ._temporary_result_file .open ("wb" ) as f :
689
+ cloudpickle .dump (results , f )
690
+
691
+ def _temporary_result_unpickle (self ):
692
+ with self ._temporary_result_file .open ("rb" ) as f :
693
+ results = cloudpickle .load (f )
694
+ return results
695
+
631
696
def _outputs_to_run_return (self ):
632
697
return DotDict (self .outputs .to_value_dict ())
633
698
@@ -994,6 +1059,22 @@ def report_import_readiness(self, tabs=0, report_so_far=""):
994
1059
f"{ 'ok' if self .import_ready else 'NOT IMPORTABLE' } "
995
1060
)
996
1061
1062
+ def _clean_graph_directory (self ):
1063
+ """
1064
+ Delete the temporary results file (if any), and then go from this node's
1065
+ semantic directory up to its semantic root's directory removing any empty
1066
+ directories. Note: doesn't do a sophisticated walk, so sibling empty
1067
+ directories will cause a parent to identify as non-empty.
1068
+ """
1069
+ self ._temporary_result_file .unlink (missing_ok = True )
1070
+
1071
+ # Recursively remove empty directories
1072
+ root_directory = self .semantic_root .as_path ().parent
1073
+ for parent in self ._temporary_result_file .parents :
1074
+ if parent == root_directory or not parent .exists () or any (parent .iterdir ()):
1075
+ break
1076
+ parent .rmdir ()
1077
+
997
1078
def display_state (self , state = None , ignore_private = True ):
998
1079
state = dict (self .__getstate__ ()) if state is None else state
999
1080
if self .parent is not None :
0 commit comments